{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 8802, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00034083162917518747, "grad_norm": 57.59091708666561, "learning_rate": 0.0, "loss": 5.8168, "step": 1 }, { "epoch": 0.0006816632583503749, "grad_norm": 35.504484478569786, "learning_rate": 1.1350737797956868e-08, "loss": 4.9348, "step": 2 }, { "epoch": 0.0010224948875255625, "grad_norm": 36.74505399565994, "learning_rate": 2.2701475595913736e-08, "loss": 4.5229, "step": 3 }, { "epoch": 0.0013633265167007499, "grad_norm": 31.028204839735388, "learning_rate": 3.40522133938706e-08, "loss": 4.4453, "step": 4 }, { "epoch": 0.0017041581458759373, "grad_norm": 42.971982705186356, "learning_rate": 4.540295119182747e-08, "loss": 5.0187, "step": 5 }, { "epoch": 0.002044989775051125, "grad_norm": 48.37098858867094, "learning_rate": 5.6753688989784336e-08, "loss": 4.6632, "step": 6 }, { "epoch": 0.0023858214042263124, "grad_norm": 41.05736206565161, "learning_rate": 6.81044267877412e-08, "loss": 5.0529, "step": 7 }, { "epoch": 0.0027266530334014998, "grad_norm": 30.895923414240883, "learning_rate": 7.945516458569808e-08, "loss": 4.097, "step": 8 }, { "epoch": 0.003067484662576687, "grad_norm": 28.139464264677446, "learning_rate": 9.080590238365495e-08, "loss": 4.0045, "step": 9 }, { "epoch": 0.0034083162917518746, "grad_norm": 56.60422298760434, "learning_rate": 1.0215664018161182e-07, "loss": 5.0767, "step": 10 }, { "epoch": 0.003749147920927062, "grad_norm": 32.58747616018831, "learning_rate": 1.1350737797956867e-07, "loss": 4.2204, "step": 11 }, { "epoch": 0.00408997955010225, "grad_norm": 77.12020113980064, "learning_rate": 1.2485811577752554e-07, "loss": 4.7817, "step": 12 }, { "epoch": 0.004430811179277437, "grad_norm": 38.578010149366236, "learning_rate": 1.362088535754824e-07, "loss": 4.1481, "step": 13 }, { "epoch": 0.004771642808452625, "grad_norm": 31.6737289164945, "learning_rate": 1.4755959137343928e-07, "loss": 4.6646, "step": 14 }, { "epoch": 0.005112474437627812, "grad_norm": 41.31979076745021, "learning_rate": 1.5891032917139615e-07, "loss": 4.1875, "step": 15 }, { "epoch": 0.0054533060668029995, "grad_norm": 31.343917032598256, "learning_rate": 1.7026106696935302e-07, "loss": 4.6402, "step": 16 }, { "epoch": 0.005794137695978187, "grad_norm": 39.75586604612425, "learning_rate": 1.816118047673099e-07, "loss": 4.9286, "step": 17 }, { "epoch": 0.006134969325153374, "grad_norm": 46.170048454435346, "learning_rate": 1.9296254256526676e-07, "loss": 5.4478, "step": 18 }, { "epoch": 0.006475800954328562, "grad_norm": 50.34261010053117, "learning_rate": 2.0431328036322363e-07, "loss": 4.3283, "step": 19 }, { "epoch": 0.006816632583503749, "grad_norm": 34.45048910040835, "learning_rate": 2.156640181611805e-07, "loss": 4.4405, "step": 20 }, { "epoch": 0.007157464212678937, "grad_norm": 29.228334505764643, "learning_rate": 2.2701475595913734e-07, "loss": 3.7926, "step": 21 }, { "epoch": 0.007498295841854124, "grad_norm": 43.094353346124514, "learning_rate": 2.3836549375709421e-07, "loss": 5.2052, "step": 22 }, { "epoch": 0.007839127471029312, "grad_norm": 49.11282664435778, "learning_rate": 2.497162315550511e-07, "loss": 5.0791, "step": 23 }, { "epoch": 0.0081799591002045, "grad_norm": 32.96859290994525, "learning_rate": 2.61066969353008e-07, "loss": 4.5298, "step": 24 }, { "epoch": 0.008520790729379687, "grad_norm": 38.613399659302985, "learning_rate": 2.724177071509648e-07, "loss": 4.6864, "step": 25 }, { "epoch": 0.008861622358554875, "grad_norm": 36.93695516496745, "learning_rate": 2.837684449489217e-07, "loss": 4.3844, "step": 26 }, { "epoch": 0.009202453987730062, "grad_norm": 42.6721215467686, "learning_rate": 2.9511918274687856e-07, "loss": 5.1473, "step": 27 }, { "epoch": 0.00954328561690525, "grad_norm": 57.352269402058106, "learning_rate": 3.064699205448354e-07, "loss": 4.2618, "step": 28 }, { "epoch": 0.009884117246080437, "grad_norm": 55.365441413724994, "learning_rate": 3.178206583427923e-07, "loss": 5.0097, "step": 29 }, { "epoch": 0.010224948875255624, "grad_norm": 25.35316815868296, "learning_rate": 3.2917139614074915e-07, "loss": 3.9989, "step": 30 }, { "epoch": 0.010565780504430812, "grad_norm": 34.23714511262736, "learning_rate": 3.4052213393870604e-07, "loss": 4.2315, "step": 31 }, { "epoch": 0.010906612133605999, "grad_norm": 55.82043410873589, "learning_rate": 3.518728717366629e-07, "loss": 6.0994, "step": 32 }, { "epoch": 0.011247443762781187, "grad_norm": 48.619073461652675, "learning_rate": 3.632236095346198e-07, "loss": 4.8489, "step": 33 }, { "epoch": 0.011588275391956374, "grad_norm": 37.56783465614343, "learning_rate": 3.745743473325766e-07, "loss": 4.7175, "step": 34 }, { "epoch": 0.011929107021131561, "grad_norm": 50.21177513772905, "learning_rate": 3.859250851305335e-07, "loss": 5.3359, "step": 35 }, { "epoch": 0.012269938650306749, "grad_norm": 44.46325523693967, "learning_rate": 3.9727582292849036e-07, "loss": 5.4258, "step": 36 }, { "epoch": 0.012610770279481936, "grad_norm": 27.459314920084783, "learning_rate": 4.0862656072644726e-07, "loss": 3.8282, "step": 37 }, { "epoch": 0.012951601908657124, "grad_norm": 44.14694629863951, "learning_rate": 4.199772985244041e-07, "loss": 4.1953, "step": 38 }, { "epoch": 0.013292433537832311, "grad_norm": 38.907810004842695, "learning_rate": 4.31328036322361e-07, "loss": 4.8621, "step": 39 }, { "epoch": 0.013633265167007498, "grad_norm": 33.0510438495151, "learning_rate": 4.4267877412031784e-07, "loss": 4.236, "step": 40 }, { "epoch": 0.013974096796182686, "grad_norm": 33.22668720989239, "learning_rate": 4.540295119182747e-07, "loss": 4.5137, "step": 41 }, { "epoch": 0.014314928425357873, "grad_norm": 78.71170207820887, "learning_rate": 4.653802497162316e-07, "loss": 5.724, "step": 42 }, { "epoch": 0.01465576005453306, "grad_norm": 59.544779257903116, "learning_rate": 4.7673098751418843e-07, "loss": 4.6553, "step": 43 }, { "epoch": 0.014996591683708248, "grad_norm": 32.149479771440475, "learning_rate": 4.880817253121453e-07, "loss": 4.6192, "step": 44 }, { "epoch": 0.015337423312883436, "grad_norm": 32.345267433240735, "learning_rate": 4.994324631101022e-07, "loss": 4.1883, "step": 45 }, { "epoch": 0.015678254942058625, "grad_norm": 38.79489670330069, "learning_rate": 5.10783200908059e-07, "loss": 4.8076, "step": 46 }, { "epoch": 0.016019086571233812, "grad_norm": 40.16376866120035, "learning_rate": 5.22133938706016e-07, "loss": 5.3198, "step": 47 }, { "epoch": 0.016359918200409, "grad_norm": 44.24017598075992, "learning_rate": 5.334846765039728e-07, "loss": 4.7539, "step": 48 }, { "epoch": 0.016700749829584187, "grad_norm": 32.10280391859119, "learning_rate": 5.448354143019296e-07, "loss": 4.3121, "step": 49 }, { "epoch": 0.017041581458759374, "grad_norm": 36.76858463688012, "learning_rate": 5.561861520998865e-07, "loss": 3.928, "step": 50 }, { "epoch": 0.017382413087934562, "grad_norm": 24.226358240965236, "learning_rate": 5.675368898978434e-07, "loss": 3.7659, "step": 51 }, { "epoch": 0.01772324471710975, "grad_norm": 47.50595317038215, "learning_rate": 5.788876276958003e-07, "loss": 4.8861, "step": 52 }, { "epoch": 0.018064076346284937, "grad_norm": 36.86532166495527, "learning_rate": 5.902383654937571e-07, "loss": 4.6835, "step": 53 }, { "epoch": 0.018404907975460124, "grad_norm": 35.76471915382078, "learning_rate": 6.01589103291714e-07, "loss": 3.8985, "step": 54 }, { "epoch": 0.01874573960463531, "grad_norm": 29.80717279427096, "learning_rate": 6.129398410896708e-07, "loss": 4.3766, "step": 55 }, { "epoch": 0.0190865712338105, "grad_norm": 24.044222742554417, "learning_rate": 6.242905788876278e-07, "loss": 3.9505, "step": 56 }, { "epoch": 0.019427402862985686, "grad_norm": 30.03966098559725, "learning_rate": 6.356413166855846e-07, "loss": 4.0955, "step": 57 }, { "epoch": 0.019768234492160874, "grad_norm": 32.04685142968214, "learning_rate": 6.469920544835414e-07, "loss": 4.6116, "step": 58 }, { "epoch": 0.02010906612133606, "grad_norm": 41.7555405534856, "learning_rate": 6.583427922814983e-07, "loss": 5.0312, "step": 59 }, { "epoch": 0.02044989775051125, "grad_norm": 29.574044326552514, "learning_rate": 6.696935300794552e-07, "loss": 3.8183, "step": 60 }, { "epoch": 0.020790729379686436, "grad_norm": 35.383343479813675, "learning_rate": 6.810442678774121e-07, "loss": 4.0759, "step": 61 }, { "epoch": 0.021131561008861623, "grad_norm": 28.865446702738982, "learning_rate": 6.923950056753689e-07, "loss": 4.2317, "step": 62 }, { "epoch": 0.02147239263803681, "grad_norm": 55.21760634206572, "learning_rate": 7.037457434733258e-07, "loss": 5.0048, "step": 63 }, { "epoch": 0.021813224267211998, "grad_norm": 39.28656961750482, "learning_rate": 7.150964812712827e-07, "loss": 4.2169, "step": 64 }, { "epoch": 0.022154055896387186, "grad_norm": 37.67836056485393, "learning_rate": 7.264472190692396e-07, "loss": 4.6545, "step": 65 }, { "epoch": 0.022494887525562373, "grad_norm": 54.93570670381075, "learning_rate": 7.377979568671964e-07, "loss": 4.0702, "step": 66 }, { "epoch": 0.02283571915473756, "grad_norm": 31.657977048597918, "learning_rate": 7.491486946651533e-07, "loss": 4.2067, "step": 67 }, { "epoch": 0.023176550783912748, "grad_norm": 46.4832275644821, "learning_rate": 7.604994324631101e-07, "loss": 5.0296, "step": 68 }, { "epoch": 0.023517382413087935, "grad_norm": 18.514303480457592, "learning_rate": 7.71850170261067e-07, "loss": 3.7887, "step": 69 }, { "epoch": 0.023858214042263123, "grad_norm": 42.1389573294111, "learning_rate": 7.832009080590239e-07, "loss": 4.9366, "step": 70 }, { "epoch": 0.02419904567143831, "grad_norm": 40.10025378505633, "learning_rate": 7.945516458569807e-07, "loss": 4.1243, "step": 71 }, { "epoch": 0.024539877300613498, "grad_norm": 78.92088848716773, "learning_rate": 8.059023836549376e-07, "loss": 4.6885, "step": 72 }, { "epoch": 0.024880708929788685, "grad_norm": 26.317292561023415, "learning_rate": 8.172531214528945e-07, "loss": 4.0815, "step": 73 }, { "epoch": 0.025221540558963872, "grad_norm": 27.519678827637833, "learning_rate": 8.286038592508514e-07, "loss": 3.7988, "step": 74 }, { "epoch": 0.02556237218813906, "grad_norm": 35.81082589311988, "learning_rate": 8.399545970488082e-07, "loss": 4.2285, "step": 75 }, { "epoch": 0.025903203817314247, "grad_norm": 36.77375833062312, "learning_rate": 8.51305334846765e-07, "loss": 4.6566, "step": 76 }, { "epoch": 0.026244035446489435, "grad_norm": 29.703379585457384, "learning_rate": 8.62656072644722e-07, "loss": 3.7432, "step": 77 }, { "epoch": 0.026584867075664622, "grad_norm": 36.956499057588225, "learning_rate": 8.740068104426788e-07, "loss": 3.8635, "step": 78 }, { "epoch": 0.02692569870483981, "grad_norm": 48.83904439087653, "learning_rate": 8.853575482406357e-07, "loss": 4.6102, "step": 79 }, { "epoch": 0.027266530334014997, "grad_norm": 36.868465057690344, "learning_rate": 8.967082860385925e-07, "loss": 4.4035, "step": 80 }, { "epoch": 0.027607361963190184, "grad_norm": 24.28111065178101, "learning_rate": 9.080590238365494e-07, "loss": 3.5808, "step": 81 }, { "epoch": 0.02794819359236537, "grad_norm": 41.37504780475601, "learning_rate": 9.194097616345063e-07, "loss": 4.4832, "step": 82 }, { "epoch": 0.02828902522154056, "grad_norm": 38.62655025478103, "learning_rate": 9.307604994324632e-07, "loss": 4.2528, "step": 83 }, { "epoch": 0.028629856850715747, "grad_norm": 50.33164580552034, "learning_rate": 9.4211123723042e-07, "loss": 3.8279, "step": 84 }, { "epoch": 0.028970688479890934, "grad_norm": 27.87148676420042, "learning_rate": 9.534619750283769e-07, "loss": 4.2272, "step": 85 }, { "epoch": 0.02931152010906612, "grad_norm": 24.143089590660246, "learning_rate": 9.648127128263338e-07, "loss": 3.9701, "step": 86 }, { "epoch": 0.02965235173824131, "grad_norm": 28.328589618319814, "learning_rate": 9.761634506242906e-07, "loss": 3.7022, "step": 87 }, { "epoch": 0.029993183367416496, "grad_norm": 30.03347409403586, "learning_rate": 9.875141884222475e-07, "loss": 3.482, "step": 88 }, { "epoch": 0.030334014996591684, "grad_norm": 45.232130952103994, "learning_rate": 9.988649262202043e-07, "loss": 4.8248, "step": 89 }, { "epoch": 0.03067484662576687, "grad_norm": 60.78338113303006, "learning_rate": 1.0102156640181612e-06, "loss": 4.8718, "step": 90 }, { "epoch": 0.03101567825494206, "grad_norm": 30.229593062195566, "learning_rate": 1.021566401816118e-06, "loss": 3.9943, "step": 91 }, { "epoch": 0.03135650988411725, "grad_norm": 22.47447511492927, "learning_rate": 1.032917139614075e-06, "loss": 3.76, "step": 92 }, { "epoch": 0.03169734151329243, "grad_norm": 65.43300672545271, "learning_rate": 1.044267877412032e-06, "loss": 4.3451, "step": 93 }, { "epoch": 0.032038173142467624, "grad_norm": 50.831219968265835, "learning_rate": 1.0556186152099888e-06, "loss": 4.606, "step": 94 }, { "epoch": 0.03237900477164281, "grad_norm": 41.0184461017877, "learning_rate": 1.0669693530079456e-06, "loss": 4.0382, "step": 95 }, { "epoch": 0.032719836400818, "grad_norm": 240.12359152440698, "learning_rate": 1.0783200908059024e-06, "loss": 3.733, "step": 96 }, { "epoch": 0.03306066802999318, "grad_norm": 32.320952547613125, "learning_rate": 1.0896708286038593e-06, "loss": 3.6696, "step": 97 }, { "epoch": 0.033401499659168374, "grad_norm": 35.57153441622058, "learning_rate": 1.1010215664018161e-06, "loss": 4.4483, "step": 98 }, { "epoch": 0.03374233128834356, "grad_norm": 49.18224083229904, "learning_rate": 1.112372304199773e-06, "loss": 4.2309, "step": 99 }, { "epoch": 0.03408316291751875, "grad_norm": 21.29971713894319, "learning_rate": 1.1237230419977298e-06, "loss": 3.5143, "step": 100 }, { "epoch": 0.03442399454669393, "grad_norm": 26.171300853316573, "learning_rate": 1.1350737797956869e-06, "loss": 3.5543, "step": 101 }, { "epoch": 0.034764826175869123, "grad_norm": 44.086782963499445, "learning_rate": 1.1464245175936437e-06, "loss": 4.1766, "step": 102 }, { "epoch": 0.03510565780504431, "grad_norm": 30.896609732705638, "learning_rate": 1.1577752553916006e-06, "loss": 4.1972, "step": 103 }, { "epoch": 0.0354464894342195, "grad_norm": 37.03141053528742, "learning_rate": 1.1691259931895574e-06, "loss": 4.2849, "step": 104 }, { "epoch": 0.03578732106339468, "grad_norm": 59.63533148292523, "learning_rate": 1.1804767309875143e-06, "loss": 4.6286, "step": 105 }, { "epoch": 0.03612815269256987, "grad_norm": 20.955901686710448, "learning_rate": 1.191827468785471e-06, "loss": 3.4592, "step": 106 }, { "epoch": 0.03646898432174506, "grad_norm": 22.39571713768874, "learning_rate": 1.203178206583428e-06, "loss": 3.4562, "step": 107 }, { "epoch": 0.03680981595092025, "grad_norm": 29.79417253459404, "learning_rate": 1.2145289443813848e-06, "loss": 3.8712, "step": 108 }, { "epoch": 0.03715064758009543, "grad_norm": 32.09391274068315, "learning_rate": 1.2258796821793416e-06, "loss": 3.9436, "step": 109 }, { "epoch": 0.03749147920927062, "grad_norm": 36.11220965457796, "learning_rate": 1.2372304199772987e-06, "loss": 3.8297, "step": 110 }, { "epoch": 0.03783231083844581, "grad_norm": 76.90999993153758, "learning_rate": 1.2485811577752555e-06, "loss": 3.3499, "step": 111 }, { "epoch": 0.038173142467621, "grad_norm": 38.51303611362027, "learning_rate": 1.2599318955732124e-06, "loss": 4.1105, "step": 112 }, { "epoch": 0.03851397409679618, "grad_norm": 26.591400688979366, "learning_rate": 1.2712826333711692e-06, "loss": 3.9424, "step": 113 }, { "epoch": 0.03885480572597137, "grad_norm": 23.138254071007996, "learning_rate": 1.282633371169126e-06, "loss": 3.4484, "step": 114 }, { "epoch": 0.039195637355146556, "grad_norm": 36.38350498197628, "learning_rate": 1.293984108967083e-06, "loss": 3.9138, "step": 115 }, { "epoch": 0.03953646898432175, "grad_norm": 29.970820256623774, "learning_rate": 1.3053348467650397e-06, "loss": 3.567, "step": 116 }, { "epoch": 0.03987730061349693, "grad_norm": 22.310364536411072, "learning_rate": 1.3166855845629966e-06, "loss": 3.4333, "step": 117 }, { "epoch": 0.04021813224267212, "grad_norm": 29.707320103064053, "learning_rate": 1.3280363223609536e-06, "loss": 4.0237, "step": 118 }, { "epoch": 0.040558963871847306, "grad_norm": 23.363929093222016, "learning_rate": 1.3393870601589105e-06, "loss": 3.5037, "step": 119 }, { "epoch": 0.0408997955010225, "grad_norm": 28.76695583301503, "learning_rate": 1.3507377979568673e-06, "loss": 3.988, "step": 120 }, { "epoch": 0.04124062713019768, "grad_norm": 45.20691980022229, "learning_rate": 1.3620885357548242e-06, "loss": 4.9047, "step": 121 }, { "epoch": 0.04158145875937287, "grad_norm": 39.28631850070014, "learning_rate": 1.373439273552781e-06, "loss": 4.4538, "step": 122 }, { "epoch": 0.041922290388548056, "grad_norm": 31.922807313665288, "learning_rate": 1.3847900113507379e-06, "loss": 4.317, "step": 123 }, { "epoch": 0.04226312201772325, "grad_norm": 26.063870747424055, "learning_rate": 1.3961407491486947e-06, "loss": 3.2997, "step": 124 }, { "epoch": 0.04260395364689843, "grad_norm": 31.9355859080573, "learning_rate": 1.4074914869466515e-06, "loss": 3.768, "step": 125 }, { "epoch": 0.04294478527607362, "grad_norm": 33.89876388844045, "learning_rate": 1.4188422247446084e-06, "loss": 4.0892, "step": 126 }, { "epoch": 0.043285616905248805, "grad_norm": 23.89289470929762, "learning_rate": 1.4301929625425654e-06, "loss": 3.4874, "step": 127 }, { "epoch": 0.043626448534423996, "grad_norm": 20.54350929250958, "learning_rate": 1.4415437003405223e-06, "loss": 3.6092, "step": 128 }, { "epoch": 0.04396728016359918, "grad_norm": 36.61796838886808, "learning_rate": 1.4528944381384791e-06, "loss": 4.333, "step": 129 }, { "epoch": 0.04430811179277437, "grad_norm": 21.070851523451854, "learning_rate": 1.464245175936436e-06, "loss": 2.9482, "step": 130 }, { "epoch": 0.044648943421949555, "grad_norm": 24.326320234915727, "learning_rate": 1.4755959137343928e-06, "loss": 3.463, "step": 131 }, { "epoch": 0.044989775051124746, "grad_norm": 51.98180724699175, "learning_rate": 1.4869466515323497e-06, "loss": 4.7931, "step": 132 }, { "epoch": 0.04533060668029993, "grad_norm": 22.702762626979833, "learning_rate": 1.4982973893303065e-06, "loss": 3.8917, "step": 133 }, { "epoch": 0.04567143830947512, "grad_norm": 60.05517275809835, "learning_rate": 1.5096481271282633e-06, "loss": 5.0576, "step": 134 }, { "epoch": 0.046012269938650305, "grad_norm": 37.619537007165945, "learning_rate": 1.5209988649262202e-06, "loss": 4.4647, "step": 135 }, { "epoch": 0.046353101567825496, "grad_norm": 27.274330826725596, "learning_rate": 1.5323496027241772e-06, "loss": 3.7104, "step": 136 }, { "epoch": 0.04669393319700068, "grad_norm": 35.937893531424805, "learning_rate": 1.543700340522134e-06, "loss": 3.8603, "step": 137 }, { "epoch": 0.04703476482617587, "grad_norm": 23.848134320515413, "learning_rate": 1.555051078320091e-06, "loss": 3.8803, "step": 138 }, { "epoch": 0.047375596455351054, "grad_norm": 20.787555469755514, "learning_rate": 1.5664018161180478e-06, "loss": 3.3743, "step": 139 }, { "epoch": 0.047716428084526245, "grad_norm": 42.73142734647558, "learning_rate": 1.5777525539160046e-06, "loss": 5.0564, "step": 140 }, { "epoch": 0.04805725971370143, "grad_norm": 22.330600936219067, "learning_rate": 1.5891032917139615e-06, "loss": 3.0635, "step": 141 }, { "epoch": 0.04839809134287662, "grad_norm": 18.87190554235252, "learning_rate": 1.6004540295119183e-06, "loss": 3.7781, "step": 142 }, { "epoch": 0.048738922972051804, "grad_norm": 34.798453927798896, "learning_rate": 1.6118047673098751e-06, "loss": 3.9081, "step": 143 }, { "epoch": 0.049079754601226995, "grad_norm": 54.447807214847195, "learning_rate": 1.6231555051078322e-06, "loss": 4.1109, "step": 144 }, { "epoch": 0.04942058623040218, "grad_norm": 28.968541777599967, "learning_rate": 1.634506242905789e-06, "loss": 3.8615, "step": 145 }, { "epoch": 0.04976141785957737, "grad_norm": 47.44566987070836, "learning_rate": 1.6458569807037459e-06, "loss": 4.1951, "step": 146 }, { "epoch": 0.050102249488752554, "grad_norm": 28.43698837152208, "learning_rate": 1.6572077185017027e-06, "loss": 3.9544, "step": 147 }, { "epoch": 0.050443081117927745, "grad_norm": 25.542114310264832, "learning_rate": 1.6685584562996596e-06, "loss": 3.3524, "step": 148 }, { "epoch": 0.05078391274710293, "grad_norm": 38.53664218283615, "learning_rate": 1.6799091940976164e-06, "loss": 4.4971, "step": 149 }, { "epoch": 0.05112474437627812, "grad_norm": 25.138641206315906, "learning_rate": 1.6912599318955733e-06, "loss": 3.5213, "step": 150 }, { "epoch": 0.0514655760054533, "grad_norm": 21.143745115155863, "learning_rate": 1.70261066969353e-06, "loss": 3.3219, "step": 151 }, { "epoch": 0.051806407634628494, "grad_norm": 46.68950919557165, "learning_rate": 1.713961407491487e-06, "loss": 4.077, "step": 152 }, { "epoch": 0.05214723926380368, "grad_norm": 38.39616065933547, "learning_rate": 1.725312145289444e-06, "loss": 3.6285, "step": 153 }, { "epoch": 0.05248807089297887, "grad_norm": 24.216574675911076, "learning_rate": 1.7366628830874008e-06, "loss": 3.3737, "step": 154 }, { "epoch": 0.05282890252215405, "grad_norm": 37.23784866695728, "learning_rate": 1.7480136208853577e-06, "loss": 4.2556, "step": 155 }, { "epoch": 0.053169734151329244, "grad_norm": 47.363117515136565, "learning_rate": 1.7593643586833145e-06, "loss": 3.5448, "step": 156 }, { "epoch": 0.05351056578050443, "grad_norm": 40.1060800000987, "learning_rate": 1.7707150964812714e-06, "loss": 4.2298, "step": 157 }, { "epoch": 0.05385139740967962, "grad_norm": 29.02274262213852, "learning_rate": 1.7820658342792282e-06, "loss": 3.5804, "step": 158 }, { "epoch": 0.0541922290388548, "grad_norm": 61.39155334138686, "learning_rate": 1.793416572077185e-06, "loss": 4.2972, "step": 159 }, { "epoch": 0.054533060668029994, "grad_norm": 60.74957472369227, "learning_rate": 1.804767309875142e-06, "loss": 3.7249, "step": 160 }, { "epoch": 0.05487389229720518, "grad_norm": 55.75039324256153, "learning_rate": 1.8161180476730988e-06, "loss": 3.6803, "step": 161 }, { "epoch": 0.05521472392638037, "grad_norm": 26.772128245163103, "learning_rate": 1.8274687854710558e-06, "loss": 3.5408, "step": 162 }, { "epoch": 0.05555555555555555, "grad_norm": 64.26565595342448, "learning_rate": 1.8388195232690126e-06, "loss": 3.8707, "step": 163 }, { "epoch": 0.05589638718473074, "grad_norm": 27.285922054710852, "learning_rate": 1.8501702610669695e-06, "loss": 3.3474, "step": 164 }, { "epoch": 0.05623721881390593, "grad_norm": 38.94204926018756, "learning_rate": 1.8615209988649263e-06, "loss": 3.5277, "step": 165 }, { "epoch": 0.05657805044308112, "grad_norm": 35.44798550410898, "learning_rate": 1.8728717366628832e-06, "loss": 3.8651, "step": 166 }, { "epoch": 0.0569188820722563, "grad_norm": 28.45287269341736, "learning_rate": 1.88422247446084e-06, "loss": 3.5238, "step": 167 }, { "epoch": 0.05725971370143149, "grad_norm": 20.069880891877332, "learning_rate": 1.8955732122587969e-06, "loss": 3.1765, "step": 168 }, { "epoch": 0.05760054533060668, "grad_norm": 23.588129348117036, "learning_rate": 1.9069239500567537e-06, "loss": 3.1477, "step": 169 }, { "epoch": 0.05794137695978187, "grad_norm": 55.43248756046462, "learning_rate": 1.9182746878547106e-06, "loss": 3.8716, "step": 170 }, { "epoch": 0.05828220858895705, "grad_norm": 22.508382291578755, "learning_rate": 1.9296254256526676e-06, "loss": 3.6909, "step": 171 }, { "epoch": 0.05862304021813224, "grad_norm": 23.67494255734024, "learning_rate": 1.9409761634506242e-06, "loss": 3.7349, "step": 172 }, { "epoch": 0.058963871847307434, "grad_norm": 20.435206992531167, "learning_rate": 1.9523269012485813e-06, "loss": 3.7444, "step": 173 }, { "epoch": 0.05930470347648262, "grad_norm": 27.359592139227342, "learning_rate": 1.963677639046538e-06, "loss": 3.4833, "step": 174 }, { "epoch": 0.05964553510565781, "grad_norm": 41.9918720227676, "learning_rate": 1.975028376844495e-06, "loss": 4.2869, "step": 175 }, { "epoch": 0.05998636673483299, "grad_norm": 24.985571923477423, "learning_rate": 1.986379114642452e-06, "loss": 3.5109, "step": 176 }, { "epoch": 0.06032719836400818, "grad_norm": 18.932698454780553, "learning_rate": 1.9977298524404087e-06, "loss": 3.5433, "step": 177 }, { "epoch": 0.06066802999318337, "grad_norm": 28.394253800610453, "learning_rate": 2.0090805902383657e-06, "loss": 3.3059, "step": 178 }, { "epoch": 0.06100886162235856, "grad_norm": 28.765191774016984, "learning_rate": 2.0204313280363224e-06, "loss": 3.3615, "step": 179 }, { "epoch": 0.06134969325153374, "grad_norm": 21.100033059413505, "learning_rate": 2.0317820658342794e-06, "loss": 3.2112, "step": 180 }, { "epoch": 0.06169052488070893, "grad_norm": 55.569110386457744, "learning_rate": 2.043132803632236e-06, "loss": 3.3645, "step": 181 }, { "epoch": 0.06203135650988412, "grad_norm": 53.56201188920185, "learning_rate": 2.054483541430193e-06, "loss": 4.3261, "step": 182 }, { "epoch": 0.06237218813905931, "grad_norm": 25.61567407848872, "learning_rate": 2.06583427922815e-06, "loss": 3.6449, "step": 183 }, { "epoch": 0.0627130197682345, "grad_norm": 41.31805103738482, "learning_rate": 2.0771850170261068e-06, "loss": 3.7219, "step": 184 }, { "epoch": 0.06305385139740968, "grad_norm": 21.4036248134155, "learning_rate": 2.088535754824064e-06, "loss": 3.2239, "step": 185 }, { "epoch": 0.06339468302658487, "grad_norm": 31.385442932622837, "learning_rate": 2.0998864926220205e-06, "loss": 3.9935, "step": 186 }, { "epoch": 0.06373551465576005, "grad_norm": 54.646220204239526, "learning_rate": 2.1112372304199775e-06, "loss": 3.4902, "step": 187 }, { "epoch": 0.06407634628493525, "grad_norm": 27.29704402724134, "learning_rate": 2.122587968217934e-06, "loss": 3.4634, "step": 188 }, { "epoch": 0.06441717791411043, "grad_norm": 48.73053545881037, "learning_rate": 2.1339387060158912e-06, "loss": 3.5613, "step": 189 }, { "epoch": 0.06475800954328562, "grad_norm": 22.450334845074032, "learning_rate": 2.145289443813848e-06, "loss": 3.7506, "step": 190 }, { "epoch": 0.0650988411724608, "grad_norm": 30.202422660728864, "learning_rate": 2.156640181611805e-06, "loss": 3.1018, "step": 191 }, { "epoch": 0.065439672801636, "grad_norm": 31.806702268103646, "learning_rate": 2.167990919409762e-06, "loss": 3.5866, "step": 192 }, { "epoch": 0.06578050443081118, "grad_norm": 30.308620533555548, "learning_rate": 2.1793416572077186e-06, "loss": 3.505, "step": 193 }, { "epoch": 0.06612133605998637, "grad_norm": 20.514870763856617, "learning_rate": 2.1906923950056756e-06, "loss": 3.6375, "step": 194 }, { "epoch": 0.06646216768916155, "grad_norm": 45.348106445490934, "learning_rate": 2.2020431328036323e-06, "loss": 3.7995, "step": 195 }, { "epoch": 0.06680299931833675, "grad_norm": 23.13881631594261, "learning_rate": 2.2133938706015893e-06, "loss": 3.3924, "step": 196 }, { "epoch": 0.06714383094751193, "grad_norm": 64.19228314441158, "learning_rate": 2.224744608399546e-06, "loss": 3.4198, "step": 197 }, { "epoch": 0.06748466257668712, "grad_norm": 63.01653089024398, "learning_rate": 2.236095346197503e-06, "loss": 4.098, "step": 198 }, { "epoch": 0.0678254942058623, "grad_norm": 16.849431919196444, "learning_rate": 2.2474460839954596e-06, "loss": 3.4506, "step": 199 }, { "epoch": 0.0681663258350375, "grad_norm": 21.187719457127443, "learning_rate": 2.2587968217934167e-06, "loss": 3.2854, "step": 200 }, { "epoch": 0.06850715746421268, "grad_norm": 23.220234293356746, "learning_rate": 2.2701475595913738e-06, "loss": 3.4267, "step": 201 }, { "epoch": 0.06884798909338787, "grad_norm": 35.08164526356085, "learning_rate": 2.2814982973893304e-06, "loss": 3.8705, "step": 202 }, { "epoch": 0.06918882072256305, "grad_norm": 39.076967623239256, "learning_rate": 2.2928490351872874e-06, "loss": 4.2538, "step": 203 }, { "epoch": 0.06952965235173825, "grad_norm": 21.065106342686978, "learning_rate": 2.304199772985244e-06, "loss": 3.4893, "step": 204 }, { "epoch": 0.06987048398091343, "grad_norm": 44.39646906269018, "learning_rate": 2.315550510783201e-06, "loss": 3.2634, "step": 205 }, { "epoch": 0.07021131561008861, "grad_norm": 17.351855017282148, "learning_rate": 2.3269012485811578e-06, "loss": 2.9929, "step": 206 }, { "epoch": 0.0705521472392638, "grad_norm": 49.86759205744471, "learning_rate": 2.338251986379115e-06, "loss": 3.9951, "step": 207 }, { "epoch": 0.070892978868439, "grad_norm": 39.44895992366927, "learning_rate": 2.3496027241770714e-06, "loss": 4.055, "step": 208 }, { "epoch": 0.07123381049761418, "grad_norm": 26.558574772267797, "learning_rate": 2.3609534619750285e-06, "loss": 3.2005, "step": 209 }, { "epoch": 0.07157464212678936, "grad_norm": 23.124943203956146, "learning_rate": 2.3723041997729856e-06, "loss": 3.4104, "step": 210 }, { "epoch": 0.07191547375596455, "grad_norm": 23.54306669958745, "learning_rate": 2.383654937570942e-06, "loss": 3.3117, "step": 211 }, { "epoch": 0.07225630538513975, "grad_norm": 40.676904697274544, "learning_rate": 2.3950056753688992e-06, "loss": 3.9221, "step": 212 }, { "epoch": 0.07259713701431493, "grad_norm": 32.80530857035327, "learning_rate": 2.406356413166856e-06, "loss": 3.7483, "step": 213 }, { "epoch": 0.07293796864349011, "grad_norm": 29.281875034756915, "learning_rate": 2.417707150964813e-06, "loss": 3.3878, "step": 214 }, { "epoch": 0.0732788002726653, "grad_norm": 20.6540063172641, "learning_rate": 2.4290578887627696e-06, "loss": 3.557, "step": 215 }, { "epoch": 0.0736196319018405, "grad_norm": 46.435644939486906, "learning_rate": 2.4404086265607266e-06, "loss": 3.7634, "step": 216 }, { "epoch": 0.07396046353101568, "grad_norm": 25.126693434874223, "learning_rate": 2.4517593643586832e-06, "loss": 3.6086, "step": 217 }, { "epoch": 0.07430129516019086, "grad_norm": 21.810454348790316, "learning_rate": 2.4631101021566403e-06, "loss": 3.3155, "step": 218 }, { "epoch": 0.07464212678936605, "grad_norm": 53.09160991385009, "learning_rate": 2.4744608399545974e-06, "loss": 3.2462, "step": 219 }, { "epoch": 0.07498295841854125, "grad_norm": 32.501693254674706, "learning_rate": 2.485811577752554e-06, "loss": 3.431, "step": 220 }, { "epoch": 0.07532379004771643, "grad_norm": 21.935563148720192, "learning_rate": 2.497162315550511e-06, "loss": 3.1367, "step": 221 }, { "epoch": 0.07566462167689161, "grad_norm": 42.9385355500167, "learning_rate": 2.508513053348468e-06, "loss": 3.0902, "step": 222 }, { "epoch": 0.0760054533060668, "grad_norm": 21.246872285049584, "learning_rate": 2.5198637911464247e-06, "loss": 3.5296, "step": 223 }, { "epoch": 0.076346284935242, "grad_norm": 29.521731400093564, "learning_rate": 2.531214528944382e-06, "loss": 3.6759, "step": 224 }, { "epoch": 0.07668711656441718, "grad_norm": 32.155300381648196, "learning_rate": 2.5425652667423384e-06, "loss": 3.7502, "step": 225 }, { "epoch": 0.07702794819359236, "grad_norm": 25.90320006372729, "learning_rate": 2.5539160045402955e-06, "loss": 3.1994, "step": 226 }, { "epoch": 0.07736877982276755, "grad_norm": 27.789835238756513, "learning_rate": 2.565266742338252e-06, "loss": 3.1151, "step": 227 }, { "epoch": 0.07770961145194274, "grad_norm": 37.09222234028481, "learning_rate": 2.576617480136209e-06, "loss": 3.797, "step": 228 }, { "epoch": 0.07805044308111793, "grad_norm": 30.702416550199718, "learning_rate": 2.587968217934166e-06, "loss": 3.7105, "step": 229 }, { "epoch": 0.07839127471029311, "grad_norm": 30.61150157817821, "learning_rate": 2.599318955732123e-06, "loss": 3.1572, "step": 230 }, { "epoch": 0.0787321063394683, "grad_norm": 35.66433784679161, "learning_rate": 2.6106696935300795e-06, "loss": 4.1919, "step": 231 }, { "epoch": 0.0790729379686435, "grad_norm": 21.043134553681316, "learning_rate": 2.6220204313280365e-06, "loss": 3.1608, "step": 232 }, { "epoch": 0.07941376959781868, "grad_norm": 40.947832982778664, "learning_rate": 2.633371169125993e-06, "loss": 3.8684, "step": 233 }, { "epoch": 0.07975460122699386, "grad_norm": 18.946432759798558, "learning_rate": 2.6447219069239506e-06, "loss": 2.4338, "step": 234 }, { "epoch": 0.08009543285616905, "grad_norm": 26.017493191175593, "learning_rate": 2.6560726447219073e-06, "loss": 3.3031, "step": 235 }, { "epoch": 0.08043626448534424, "grad_norm": 30.10315485251379, "learning_rate": 2.6674233825198643e-06, "loss": 3.3657, "step": 236 }, { "epoch": 0.08077709611451943, "grad_norm": 29.722828331000297, "learning_rate": 2.678774120317821e-06, "loss": 2.8644, "step": 237 }, { "epoch": 0.08111792774369461, "grad_norm": 19.31839303443129, "learning_rate": 2.690124858115778e-06, "loss": 3.3219, "step": 238 }, { "epoch": 0.0814587593728698, "grad_norm": 25.037875988561936, "learning_rate": 2.7014755959137347e-06, "loss": 3.4312, "step": 239 }, { "epoch": 0.081799591002045, "grad_norm": 34.8407242241867, "learning_rate": 2.7128263337116917e-06, "loss": 4.1811, "step": 240 }, { "epoch": 0.08214042263122018, "grad_norm": 31.61384821916224, "learning_rate": 2.7241770715096483e-06, "loss": 3.506, "step": 241 }, { "epoch": 0.08248125426039536, "grad_norm": 23.9847619206856, "learning_rate": 2.7355278093076054e-06, "loss": 3.4181, "step": 242 }, { "epoch": 0.08282208588957055, "grad_norm": 26.209208439183612, "learning_rate": 2.746878547105562e-06, "loss": 3.7077, "step": 243 }, { "epoch": 0.08316291751874574, "grad_norm": 29.23625380361444, "learning_rate": 2.758229284903519e-06, "loss": 3.7317, "step": 244 }, { "epoch": 0.08350374914792093, "grad_norm": 63.439444386741464, "learning_rate": 2.7695800227014757e-06, "loss": 3.8577, "step": 245 }, { "epoch": 0.08384458077709611, "grad_norm": 30.947168607779197, "learning_rate": 2.7809307604994328e-06, "loss": 3.2762, "step": 246 }, { "epoch": 0.0841854124062713, "grad_norm": 26.502783210103775, "learning_rate": 2.7922814982973894e-06, "loss": 3.3473, "step": 247 }, { "epoch": 0.0845262440354465, "grad_norm": 21.072478849628332, "learning_rate": 2.8036322360953465e-06, "loss": 3.4363, "step": 248 }, { "epoch": 0.08486707566462168, "grad_norm": 18.283665968486943, "learning_rate": 2.814982973893303e-06, "loss": 2.9151, "step": 249 }, { "epoch": 0.08520790729379686, "grad_norm": 24.223399567532702, "learning_rate": 2.8263337116912606e-06, "loss": 3.4902, "step": 250 }, { "epoch": 0.08554873892297205, "grad_norm": 25.63036338595163, "learning_rate": 2.8376844494892168e-06, "loss": 3.2724, "step": 251 }, { "epoch": 0.08588957055214724, "grad_norm": 24.978610060838108, "learning_rate": 2.8490351872871742e-06, "loss": 3.0956, "step": 252 }, { "epoch": 0.08623040218132243, "grad_norm": 29.73342604601439, "learning_rate": 2.860385925085131e-06, "loss": 3.3138, "step": 253 }, { "epoch": 0.08657123381049761, "grad_norm": 46.082948063934055, "learning_rate": 2.871736662883088e-06, "loss": 3.6935, "step": 254 }, { "epoch": 0.0869120654396728, "grad_norm": 29.69501560376505, "learning_rate": 2.8830874006810446e-06, "loss": 3.5614, "step": 255 }, { "epoch": 0.08725289706884799, "grad_norm": 30.306980462216288, "learning_rate": 2.8944381384790016e-06, "loss": 3.6208, "step": 256 }, { "epoch": 0.08759372869802318, "grad_norm": 21.886077062127463, "learning_rate": 2.9057888762769583e-06, "loss": 3.3761, "step": 257 }, { "epoch": 0.08793456032719836, "grad_norm": 43.409720943322334, "learning_rate": 2.9171396140749153e-06, "loss": 3.1294, "step": 258 }, { "epoch": 0.08827539195637356, "grad_norm": 26.17275320291272, "learning_rate": 2.928490351872872e-06, "loss": 3.3809, "step": 259 }, { "epoch": 0.08861622358554874, "grad_norm": 25.218953730232002, "learning_rate": 2.939841089670829e-06, "loss": 3.4094, "step": 260 }, { "epoch": 0.08895705521472393, "grad_norm": 31.847086127505893, "learning_rate": 2.9511918274687856e-06, "loss": 3.7041, "step": 261 }, { "epoch": 0.08929788684389911, "grad_norm": 26.17363855057902, "learning_rate": 2.9625425652667427e-06, "loss": 3.5187, "step": 262 }, { "epoch": 0.08963871847307431, "grad_norm": 40.81006293997907, "learning_rate": 2.9738933030646993e-06, "loss": 3.5497, "step": 263 }, { "epoch": 0.08997955010224949, "grad_norm": 16.210255368203537, "learning_rate": 2.9852440408626564e-06, "loss": 3.1234, "step": 264 }, { "epoch": 0.09032038173142468, "grad_norm": 32.31431679645391, "learning_rate": 2.996594778660613e-06, "loss": 3.9211, "step": 265 }, { "epoch": 0.09066121336059986, "grad_norm": 52.38485761652781, "learning_rate": 3.00794551645857e-06, "loss": 3.1208, "step": 266 }, { "epoch": 0.09100204498977506, "grad_norm": 25.842788550648024, "learning_rate": 3.0192962542565267e-06, "loss": 3.1373, "step": 267 }, { "epoch": 0.09134287661895024, "grad_norm": 26.47039528163675, "learning_rate": 3.030646992054484e-06, "loss": 3.2163, "step": 268 }, { "epoch": 0.09168370824812543, "grad_norm": 49.89578481927084, "learning_rate": 3.0419977298524404e-06, "loss": 3.4747, "step": 269 }, { "epoch": 0.09202453987730061, "grad_norm": 26.78144936364749, "learning_rate": 3.053348467650398e-06, "loss": 3.1215, "step": 270 }, { "epoch": 0.09236537150647581, "grad_norm": 26.205871283433847, "learning_rate": 3.0646992054483545e-06, "loss": 3.479, "step": 271 }, { "epoch": 0.09270620313565099, "grad_norm": 20.426963660385688, "learning_rate": 3.0760499432463115e-06, "loss": 2.7827, "step": 272 }, { "epoch": 0.09304703476482618, "grad_norm": 20.55255174066778, "learning_rate": 3.087400681044268e-06, "loss": 3.1502, "step": 273 }, { "epoch": 0.09338786639400136, "grad_norm": 22.762808579770258, "learning_rate": 3.0987514188422252e-06, "loss": 3.4616, "step": 274 }, { "epoch": 0.09372869802317656, "grad_norm": 23.503165311579803, "learning_rate": 3.110102156640182e-06, "loss": 3.5193, "step": 275 }, { "epoch": 0.09406952965235174, "grad_norm": 24.717522772008035, "learning_rate": 3.121452894438139e-06, "loss": 3.3335, "step": 276 }, { "epoch": 0.09441036128152692, "grad_norm": 28.393887039982605, "learning_rate": 3.1328036322360955e-06, "loss": 3.1796, "step": 277 }, { "epoch": 0.09475119291070211, "grad_norm": 23.759519226531328, "learning_rate": 3.1441543700340526e-06, "loss": 3.258, "step": 278 }, { "epoch": 0.0950920245398773, "grad_norm": 18.610174926461337, "learning_rate": 3.1555051078320092e-06, "loss": 3.4963, "step": 279 }, { "epoch": 0.09543285616905249, "grad_norm": 25.30913908219719, "learning_rate": 3.1668558456299663e-06, "loss": 3.4497, "step": 280 }, { "epoch": 0.09577368779822767, "grad_norm": 26.517445169273053, "learning_rate": 3.178206583427923e-06, "loss": 3.5015, "step": 281 }, { "epoch": 0.09611451942740286, "grad_norm": 39.83056712344248, "learning_rate": 3.18955732122588e-06, "loss": 4.0058, "step": 282 }, { "epoch": 0.09645535105657806, "grad_norm": 41.23501814081815, "learning_rate": 3.2009080590238366e-06, "loss": 4.1369, "step": 283 }, { "epoch": 0.09679618268575324, "grad_norm": 23.469824760761757, "learning_rate": 3.2122587968217937e-06, "loss": 3.1914, "step": 284 }, { "epoch": 0.09713701431492842, "grad_norm": 49.76956345485541, "learning_rate": 3.2236095346197503e-06, "loss": 4.1823, "step": 285 }, { "epoch": 0.09747784594410361, "grad_norm": 30.724558910248216, "learning_rate": 3.2349602724177078e-06, "loss": 3.3774, "step": 286 }, { "epoch": 0.0978186775732788, "grad_norm": 26.867356753870524, "learning_rate": 3.2463110102156644e-06, "loss": 3.6886, "step": 287 }, { "epoch": 0.09815950920245399, "grad_norm": 24.641571116943812, "learning_rate": 3.2576617480136215e-06, "loss": 3.0539, "step": 288 }, { "epoch": 0.09850034083162917, "grad_norm": 40.606643400049876, "learning_rate": 3.269012485811578e-06, "loss": 3.3282, "step": 289 }, { "epoch": 0.09884117246080436, "grad_norm": 44.81791437815206, "learning_rate": 3.280363223609535e-06, "loss": 3.8095, "step": 290 }, { "epoch": 0.09918200408997956, "grad_norm": 38.30550736945801, "learning_rate": 3.2917139614074918e-06, "loss": 3.7563, "step": 291 }, { "epoch": 0.09952283571915474, "grad_norm": 18.68954613253475, "learning_rate": 3.303064699205449e-06, "loss": 3.1461, "step": 292 }, { "epoch": 0.09986366734832992, "grad_norm": 29.090557427814645, "learning_rate": 3.3144154370034055e-06, "loss": 3.4867, "step": 293 }, { "epoch": 0.10020449897750511, "grad_norm": 26.14332965390013, "learning_rate": 3.3257661748013625e-06, "loss": 3.1441, "step": 294 }, { "epoch": 0.1005453306066803, "grad_norm": 30.914503124504886, "learning_rate": 3.337116912599319e-06, "loss": 3.0591, "step": 295 }, { "epoch": 0.10088616223585549, "grad_norm": 20.008122622679846, "learning_rate": 3.348467650397276e-06, "loss": 3.3649, "step": 296 }, { "epoch": 0.10122699386503067, "grad_norm": 26.092632920095117, "learning_rate": 3.359818388195233e-06, "loss": 3.5644, "step": 297 }, { "epoch": 0.10156782549420586, "grad_norm": 28.63212438558321, "learning_rate": 3.37116912599319e-06, "loss": 3.522, "step": 298 }, { "epoch": 0.10190865712338106, "grad_norm": 51.509821948389956, "learning_rate": 3.3825198637911465e-06, "loss": 3.9337, "step": 299 }, { "epoch": 0.10224948875255624, "grad_norm": 17.989704453454173, "learning_rate": 3.3938706015891036e-06, "loss": 2.9136, "step": 300 }, { "epoch": 0.10259032038173142, "grad_norm": 16.958469658198492, "learning_rate": 3.40522133938706e-06, "loss": 2.8021, "step": 301 }, { "epoch": 0.1029311520109066, "grad_norm": 37.13133445590453, "learning_rate": 3.4165720771850177e-06, "loss": 3.51, "step": 302 }, { "epoch": 0.1032719836400818, "grad_norm": 22.57628452179918, "learning_rate": 3.427922814982974e-06, "loss": 3.239, "step": 303 }, { "epoch": 0.10361281526925699, "grad_norm": 53.717734359321454, "learning_rate": 3.4392735527809314e-06, "loss": 3.9258, "step": 304 }, { "epoch": 0.10395364689843217, "grad_norm": 28.042921340328537, "learning_rate": 3.450624290578888e-06, "loss": 3.2924, "step": 305 }, { "epoch": 0.10429447852760736, "grad_norm": 15.701072697803744, "learning_rate": 3.461975028376845e-06, "loss": 3.0604, "step": 306 }, { "epoch": 0.10463531015678255, "grad_norm": 23.21846497763572, "learning_rate": 3.4733257661748017e-06, "loss": 3.2767, "step": 307 }, { "epoch": 0.10497614178595774, "grad_norm": 29.370133747684566, "learning_rate": 3.4846765039727587e-06, "loss": 3.2234, "step": 308 }, { "epoch": 0.10531697341513292, "grad_norm": 22.764078085214955, "learning_rate": 3.4960272417707154e-06, "loss": 3.3851, "step": 309 }, { "epoch": 0.1056578050443081, "grad_norm": 23.31570283622387, "learning_rate": 3.5073779795686724e-06, "loss": 3.4126, "step": 310 }, { "epoch": 0.1059986366734833, "grad_norm": 29.42933626565418, "learning_rate": 3.518728717366629e-06, "loss": 3.9545, "step": 311 }, { "epoch": 0.10633946830265849, "grad_norm": 24.436425513801332, "learning_rate": 3.530079455164586e-06, "loss": 3.4889, "step": 312 }, { "epoch": 0.10668029993183367, "grad_norm": 46.79413102742708, "learning_rate": 3.5414301929625428e-06, "loss": 3.4972, "step": 313 }, { "epoch": 0.10702113156100886, "grad_norm": 49.65149493219804, "learning_rate": 3.5527809307605e-06, "loss": 3.3471, "step": 314 }, { "epoch": 0.10736196319018405, "grad_norm": 30.65486345374183, "learning_rate": 3.5641316685584564e-06, "loss": 3.3228, "step": 315 }, { "epoch": 0.10770279481935924, "grad_norm": 39.70390547324314, "learning_rate": 3.5754824063564135e-06, "loss": 3.3986, "step": 316 }, { "epoch": 0.10804362644853442, "grad_norm": 25.14289397698772, "learning_rate": 3.58683314415437e-06, "loss": 3.536, "step": 317 }, { "epoch": 0.1083844580777096, "grad_norm": 26.268354776849872, "learning_rate": 3.598183881952327e-06, "loss": 3.8046, "step": 318 }, { "epoch": 0.1087252897068848, "grad_norm": 29.361490369722848, "learning_rate": 3.609534619750284e-06, "loss": 3.9811, "step": 319 }, { "epoch": 0.10906612133605999, "grad_norm": 21.63712113123274, "learning_rate": 3.6208853575482413e-06, "loss": 3.3854, "step": 320 }, { "epoch": 0.10940695296523517, "grad_norm": 39.485896913323444, "learning_rate": 3.6322360953461975e-06, "loss": 3.2964, "step": 321 }, { "epoch": 0.10974778459441036, "grad_norm": 48.29856764640093, "learning_rate": 3.643586833144155e-06, "loss": 2.8457, "step": 322 }, { "epoch": 0.11008861622358555, "grad_norm": 22.446917758269773, "learning_rate": 3.6549375709421116e-06, "loss": 3.2262, "step": 323 }, { "epoch": 0.11042944785276074, "grad_norm": 29.00727455870548, "learning_rate": 3.6662883087400687e-06, "loss": 2.9774, "step": 324 }, { "epoch": 0.11077027948193592, "grad_norm": 39.75232173436055, "learning_rate": 3.6776390465380253e-06, "loss": 3.2752, "step": 325 }, { "epoch": 0.1111111111111111, "grad_norm": 32.58609950538057, "learning_rate": 3.6889897843359824e-06, "loss": 3.4757, "step": 326 }, { "epoch": 0.1114519427402863, "grad_norm": 26.42898198287206, "learning_rate": 3.700340522133939e-06, "loss": 3.7568, "step": 327 }, { "epoch": 0.11179277436946149, "grad_norm": 44.117489953335216, "learning_rate": 3.711691259931896e-06, "loss": 3.8, "step": 328 }, { "epoch": 0.11213360599863667, "grad_norm": 28.855118482398545, "learning_rate": 3.7230419977298527e-06, "loss": 3.7875, "step": 329 }, { "epoch": 0.11247443762781185, "grad_norm": 25.188994445570888, "learning_rate": 3.7343927355278097e-06, "loss": 3.3821, "step": 330 }, { "epoch": 0.11281526925698705, "grad_norm": 23.229563979127278, "learning_rate": 3.7457434733257664e-06, "loss": 3.3874, "step": 331 }, { "epoch": 0.11315610088616224, "grad_norm": 32.61922999831858, "learning_rate": 3.7570942111237234e-06, "loss": 3.0305, "step": 332 }, { "epoch": 0.11349693251533742, "grad_norm": 33.943960940937544, "learning_rate": 3.76844494892168e-06, "loss": 3.3457, "step": 333 }, { "epoch": 0.1138377641445126, "grad_norm": 23.924585672170455, "learning_rate": 3.779795686719637e-06, "loss": 3.4819, "step": 334 }, { "epoch": 0.1141785957736878, "grad_norm": 16.096262308733213, "learning_rate": 3.7911464245175937e-06, "loss": 2.8664, "step": 335 }, { "epoch": 0.11451942740286299, "grad_norm": 25.72421495803069, "learning_rate": 3.8024971623155508e-06, "loss": 3.0988, "step": 336 }, { "epoch": 0.11486025903203817, "grad_norm": 24.834298977273658, "learning_rate": 3.8138479001135074e-06, "loss": 3.3421, "step": 337 }, { "epoch": 0.11520109066121335, "grad_norm": 24.341257151220084, "learning_rate": 3.8251986379114645e-06, "loss": 3.4394, "step": 338 }, { "epoch": 0.11554192229038855, "grad_norm": 32.284610474230924, "learning_rate": 3.836549375709421e-06, "loss": 3.2953, "step": 339 }, { "epoch": 0.11588275391956374, "grad_norm": 37.77876695563503, "learning_rate": 3.847900113507379e-06, "loss": 3.3744, "step": 340 }, { "epoch": 0.11622358554873892, "grad_norm": 24.832338706859655, "learning_rate": 3.859250851305335e-06, "loss": 3.7709, "step": 341 }, { "epoch": 0.1165644171779141, "grad_norm": 28.61531715327053, "learning_rate": 3.870601589103292e-06, "loss": 2.8732, "step": 342 }, { "epoch": 0.1169052488070893, "grad_norm": 31.007115637423407, "learning_rate": 3.8819523269012485e-06, "loss": 3.3029, "step": 343 }, { "epoch": 0.11724608043626449, "grad_norm": 35.68255577401891, "learning_rate": 3.893303064699206e-06, "loss": 3.7628, "step": 344 }, { "epoch": 0.11758691206543967, "grad_norm": 31.44230178958902, "learning_rate": 3.904653802497163e-06, "loss": 2.9968, "step": 345 }, { "epoch": 0.11792774369461487, "grad_norm": 27.364618915202037, "learning_rate": 3.91600454029512e-06, "loss": 2.9121, "step": 346 }, { "epoch": 0.11826857532379005, "grad_norm": 26.529262353922267, "learning_rate": 3.927355278093076e-06, "loss": 3.7887, "step": 347 }, { "epoch": 0.11860940695296524, "grad_norm": 21.79537082343792, "learning_rate": 3.938706015891033e-06, "loss": 3.0358, "step": 348 }, { "epoch": 0.11895023858214042, "grad_norm": 26.67844714442187, "learning_rate": 3.95005675368899e-06, "loss": 3.0775, "step": 349 }, { "epoch": 0.11929107021131562, "grad_norm": 20.21042511085461, "learning_rate": 3.9614074914869474e-06, "loss": 3.1757, "step": 350 }, { "epoch": 0.1196319018404908, "grad_norm": 28.634986822772976, "learning_rate": 3.972758229284904e-06, "loss": 3.0582, "step": 351 }, { "epoch": 0.11997273346966598, "grad_norm": 27.506678526895456, "learning_rate": 3.984108967082861e-06, "loss": 3.6261, "step": 352 }, { "epoch": 0.12031356509884117, "grad_norm": 26.231454159795575, "learning_rate": 3.995459704880817e-06, "loss": 3.3565, "step": 353 }, { "epoch": 0.12065439672801637, "grad_norm": 37.16585870374642, "learning_rate": 4.006810442678775e-06, "loss": 3.9849, "step": 354 }, { "epoch": 0.12099522835719155, "grad_norm": 47.26712457487932, "learning_rate": 4.0181611804767314e-06, "loss": 3.8256, "step": 355 }, { "epoch": 0.12133605998636673, "grad_norm": 29.3737643737477, "learning_rate": 4.029511918274688e-06, "loss": 3.6536, "step": 356 }, { "epoch": 0.12167689161554192, "grad_norm": 22.68092648195418, "learning_rate": 4.040862656072645e-06, "loss": 3.0232, "step": 357 }, { "epoch": 0.12201772324471712, "grad_norm": 21.384119550256347, "learning_rate": 4.052213393870602e-06, "loss": 3.2522, "step": 358 }, { "epoch": 0.1223585548738923, "grad_norm": 25.061308891750326, "learning_rate": 4.063564131668559e-06, "loss": 3.3553, "step": 359 }, { "epoch": 0.12269938650306748, "grad_norm": 37.347807645377095, "learning_rate": 4.0749148694665155e-06, "loss": 3.8297, "step": 360 }, { "epoch": 0.12304021813224267, "grad_norm": 37.61184907576129, "learning_rate": 4.086265607264472e-06, "loss": 3.1193, "step": 361 }, { "epoch": 0.12338104976141787, "grad_norm": 25.871013292402292, "learning_rate": 4.0976163450624296e-06, "loss": 3.3804, "step": 362 }, { "epoch": 0.12372188139059305, "grad_norm": 15.759794149377655, "learning_rate": 4.108967082860386e-06, "loss": 3.2964, "step": 363 }, { "epoch": 0.12406271301976823, "grad_norm": 28.208752219308547, "learning_rate": 4.120317820658344e-06, "loss": 3.5275, "step": 364 }, { "epoch": 0.12440354464894342, "grad_norm": 22.88555566293089, "learning_rate": 4.1316685584563e-06, "loss": 3.3824, "step": 365 }, { "epoch": 0.12474437627811862, "grad_norm": 23.371507076425747, "learning_rate": 4.143019296254257e-06, "loss": 3.3156, "step": 366 }, { "epoch": 0.1250852079072938, "grad_norm": 39.601470648223604, "learning_rate": 4.1543700340522136e-06, "loss": 3.4735, "step": 367 }, { "epoch": 0.125426039536469, "grad_norm": 43.084315254484984, "learning_rate": 4.165720771850171e-06, "loss": 3.4301, "step": 368 }, { "epoch": 0.12576687116564417, "grad_norm": 25.60388581382586, "learning_rate": 4.177071509648128e-06, "loss": 3.3273, "step": 369 }, { "epoch": 0.12610770279481937, "grad_norm": 19.273449561518348, "learning_rate": 4.188422247446084e-06, "loss": 3.2389, "step": 370 }, { "epoch": 0.12644853442399454, "grad_norm": 21.658112109470267, "learning_rate": 4.199772985244041e-06, "loss": 3.0686, "step": 371 }, { "epoch": 0.12678936605316973, "grad_norm": 39.350072375815394, "learning_rate": 4.211123723041998e-06, "loss": 3.7478, "step": 372 }, { "epoch": 0.12713019768234493, "grad_norm": 23.2680315384974, "learning_rate": 4.222474460839955e-06, "loss": 3.6035, "step": 373 }, { "epoch": 0.1274710293115201, "grad_norm": 21.861461330391762, "learning_rate": 4.233825198637912e-06, "loss": 3.7554, "step": 374 }, { "epoch": 0.1278118609406953, "grad_norm": 35.21458035767287, "learning_rate": 4.245175936435868e-06, "loss": 3.4164, "step": 375 }, { "epoch": 0.1281526925698705, "grad_norm": 22.400518372108994, "learning_rate": 4.256526674233826e-06, "loss": 2.9454, "step": 376 }, { "epoch": 0.12849352419904567, "grad_norm": 23.00724277264511, "learning_rate": 4.2678774120317824e-06, "loss": 3.412, "step": 377 }, { "epoch": 0.12883435582822086, "grad_norm": 58.01941704410782, "learning_rate": 4.279228149829739e-06, "loss": 4.0357, "step": 378 }, { "epoch": 0.12917518745739603, "grad_norm": 29.36389101275093, "learning_rate": 4.290578887627696e-06, "loss": 3.2109, "step": 379 }, { "epoch": 0.12951601908657123, "grad_norm": 24.386984826179006, "learning_rate": 4.301929625425653e-06, "loss": 3.5581, "step": 380 }, { "epoch": 0.12985685071574643, "grad_norm": 14.693266308882807, "learning_rate": 4.31328036322361e-06, "loss": 3.0419, "step": 381 }, { "epoch": 0.1301976823449216, "grad_norm": 26.30594134368428, "learning_rate": 4.324631101021567e-06, "loss": 3.4317, "step": 382 }, { "epoch": 0.1305385139740968, "grad_norm": 33.68306371776399, "learning_rate": 4.335981838819524e-06, "loss": 3.491, "step": 383 }, { "epoch": 0.130879345603272, "grad_norm": 33.75437605588583, "learning_rate": 4.3473325766174805e-06, "loss": 3.7256, "step": 384 }, { "epoch": 0.13122017723244717, "grad_norm": 41.01389346741316, "learning_rate": 4.358683314415437e-06, "loss": 3.8598, "step": 385 }, { "epoch": 0.13156100886162236, "grad_norm": 34.23819532149877, "learning_rate": 4.370034052213395e-06, "loss": 4.1207, "step": 386 }, { "epoch": 0.13190184049079753, "grad_norm": 27.61097850678461, "learning_rate": 4.381384790011351e-06, "loss": 2.8575, "step": 387 }, { "epoch": 0.13224267211997273, "grad_norm": 23.45228675076355, "learning_rate": 4.392735527809308e-06, "loss": 3.726, "step": 388 }, { "epoch": 0.13258350374914793, "grad_norm": 22.0667682218867, "learning_rate": 4.4040862656072645e-06, "loss": 3.1915, "step": 389 }, { "epoch": 0.1329243353783231, "grad_norm": 23.865899938088276, "learning_rate": 4.415437003405222e-06, "loss": 3.0718, "step": 390 }, { "epoch": 0.1332651670074983, "grad_norm": 28.23058669748859, "learning_rate": 4.426787741203179e-06, "loss": 3.2846, "step": 391 }, { "epoch": 0.1336059986366735, "grad_norm": 25.031098279397305, "learning_rate": 4.438138479001135e-06, "loss": 3.1893, "step": 392 }, { "epoch": 0.13394683026584867, "grad_norm": 50.6705506623126, "learning_rate": 4.449489216799092e-06, "loss": 3.2688, "step": 393 }, { "epoch": 0.13428766189502386, "grad_norm": 40.111075010992806, "learning_rate": 4.460839954597049e-06, "loss": 3.4855, "step": 394 }, { "epoch": 0.13462849352419903, "grad_norm": 25.88438425427351, "learning_rate": 4.472190692395006e-06, "loss": 3.4344, "step": 395 }, { "epoch": 0.13496932515337423, "grad_norm": 22.77529309840879, "learning_rate": 4.483541430192963e-06, "loss": 3.2855, "step": 396 }, { "epoch": 0.13531015678254943, "grad_norm": 27.03185132668022, "learning_rate": 4.494892167990919e-06, "loss": 3.2646, "step": 397 }, { "epoch": 0.1356509884117246, "grad_norm": 23.450874970517727, "learning_rate": 4.506242905788877e-06, "loss": 3.2668, "step": 398 }, { "epoch": 0.1359918200408998, "grad_norm": 24.529120168665212, "learning_rate": 4.517593643586833e-06, "loss": 3.3584, "step": 399 }, { "epoch": 0.136332651670075, "grad_norm": 24.211555751848802, "learning_rate": 4.528944381384791e-06, "loss": 3.1627, "step": 400 }, { "epoch": 0.13667348329925016, "grad_norm": 55.91516904658619, "learning_rate": 4.5402951191827475e-06, "loss": 3.5566, "step": 401 }, { "epoch": 0.13701431492842536, "grad_norm": 138.19376733662983, "learning_rate": 4.551645856980704e-06, "loss": 3.6277, "step": 402 }, { "epoch": 0.13735514655760053, "grad_norm": 27.820527345360514, "learning_rate": 4.562996594778661e-06, "loss": 3.637, "step": 403 }, { "epoch": 0.13769597818677573, "grad_norm": 21.24471506694553, "learning_rate": 4.574347332576618e-06, "loss": 2.969, "step": 404 }, { "epoch": 0.13803680981595093, "grad_norm": 30.292261750606603, "learning_rate": 4.585698070374575e-06, "loss": 3.5344, "step": 405 }, { "epoch": 0.1383776414451261, "grad_norm": 41.426628831148186, "learning_rate": 4.5970488081725315e-06, "loss": 3.3624, "step": 406 }, { "epoch": 0.1387184730743013, "grad_norm": 32.028039365339126, "learning_rate": 4.608399545970488e-06, "loss": 3.7061, "step": 407 }, { "epoch": 0.1390593047034765, "grad_norm": 24.50239499978448, "learning_rate": 4.619750283768446e-06, "loss": 3.3865, "step": 408 }, { "epoch": 0.13940013633265166, "grad_norm": 28.232554442795003, "learning_rate": 4.631101021566402e-06, "loss": 3.6223, "step": 409 }, { "epoch": 0.13974096796182686, "grad_norm": 22.960684497905188, "learning_rate": 4.642451759364359e-06, "loss": 3.3113, "step": 410 }, { "epoch": 0.14008179959100203, "grad_norm": 47.42477507605025, "learning_rate": 4.6538024971623155e-06, "loss": 3.1159, "step": 411 }, { "epoch": 0.14042263122017723, "grad_norm": 26.396505662440386, "learning_rate": 4.665153234960273e-06, "loss": 3.9023, "step": 412 }, { "epoch": 0.14076346284935243, "grad_norm": 17.65391775520735, "learning_rate": 4.67650397275823e-06, "loss": 2.8528, "step": 413 }, { "epoch": 0.1411042944785276, "grad_norm": 20.859295117894572, "learning_rate": 4.687854710556186e-06, "loss": 3.0974, "step": 414 }, { "epoch": 0.1414451261077028, "grad_norm": 22.226854348839577, "learning_rate": 4.699205448354143e-06, "loss": 3.5092, "step": 415 }, { "epoch": 0.141785957736878, "grad_norm": 28.930763959644306, "learning_rate": 4.7105561861521e-06, "loss": 3.2303, "step": 416 }, { "epoch": 0.14212678936605316, "grad_norm": 26.66244288975185, "learning_rate": 4.721906923950057e-06, "loss": 3.4155, "step": 417 }, { "epoch": 0.14246762099522836, "grad_norm": 29.22823274241426, "learning_rate": 4.7332576617480145e-06, "loss": 3.6289, "step": 418 }, { "epoch": 0.14280845262440353, "grad_norm": 37.3151455049413, "learning_rate": 4.744608399545971e-06, "loss": 3.8072, "step": 419 }, { "epoch": 0.14314928425357873, "grad_norm": 22.914006429614716, "learning_rate": 4.755959137343928e-06, "loss": 3.575, "step": 420 }, { "epoch": 0.14349011588275393, "grad_norm": 40.32178960374478, "learning_rate": 4.767309875141884e-06, "loss": 3.8046, "step": 421 }, { "epoch": 0.1438309475119291, "grad_norm": 32.34788563941903, "learning_rate": 4.778660612939842e-06, "loss": 2.5498, "step": 422 }, { "epoch": 0.1441717791411043, "grad_norm": 44.54710274991431, "learning_rate": 4.7900113507377985e-06, "loss": 3.2741, "step": 423 }, { "epoch": 0.1445126107702795, "grad_norm": 23.897792650030343, "learning_rate": 4.801362088535755e-06, "loss": 3.3159, "step": 424 }, { "epoch": 0.14485344239945466, "grad_norm": 16.501880410246084, "learning_rate": 4.812712826333712e-06, "loss": 3.2318, "step": 425 }, { "epoch": 0.14519427402862986, "grad_norm": 20.025380338093218, "learning_rate": 4.824063564131669e-06, "loss": 3.7572, "step": 426 }, { "epoch": 0.14553510565780503, "grad_norm": 32.837418179281165, "learning_rate": 4.835414301929626e-06, "loss": 4.0379, "step": 427 }, { "epoch": 0.14587593728698023, "grad_norm": 64.13756407890655, "learning_rate": 4.8467650397275825e-06, "loss": 3.3553, "step": 428 }, { "epoch": 0.14621676891615543, "grad_norm": 23.19828159290423, "learning_rate": 4.858115777525539e-06, "loss": 3.4679, "step": 429 }, { "epoch": 0.1465576005453306, "grad_norm": 28.680765735465517, "learning_rate": 4.869466515323497e-06, "loss": 3.9917, "step": 430 }, { "epoch": 0.1468984321745058, "grad_norm": 38.239247613809745, "learning_rate": 4.880817253121453e-06, "loss": 3.2472, "step": 431 }, { "epoch": 0.147239263803681, "grad_norm": 33.19618272455649, "learning_rate": 4.892167990919411e-06, "loss": 3.6932, "step": 432 }, { "epoch": 0.14758009543285616, "grad_norm": 19.12148709003621, "learning_rate": 4.9035187287173665e-06, "loss": 3.0767, "step": 433 }, { "epoch": 0.14792092706203136, "grad_norm": 30.19396243550386, "learning_rate": 4.914869466515324e-06, "loss": 3.7404, "step": 434 }, { "epoch": 0.14826175869120656, "grad_norm": 25.728042791737526, "learning_rate": 4.926220204313281e-06, "loss": 3.4838, "step": 435 }, { "epoch": 0.14860259032038173, "grad_norm": 24.68965124795127, "learning_rate": 4.937570942111238e-06, "loss": 3.7406, "step": 436 }, { "epoch": 0.14894342194955693, "grad_norm": 18.52073798040104, "learning_rate": 4.948921679909195e-06, "loss": 3.2825, "step": 437 }, { "epoch": 0.1492842535787321, "grad_norm": 19.87282104858766, "learning_rate": 4.960272417707151e-06, "loss": 3.4925, "step": 438 }, { "epoch": 0.1496250852079073, "grad_norm": 20.312571415307406, "learning_rate": 4.971623155505108e-06, "loss": 3.4295, "step": 439 }, { "epoch": 0.1499659168370825, "grad_norm": 27.808130443588837, "learning_rate": 4.9829738933030655e-06, "loss": 3.0021, "step": 440 }, { "epoch": 0.15030674846625766, "grad_norm": 23.175410101640328, "learning_rate": 4.994324631101022e-06, "loss": 3.3455, "step": 441 }, { "epoch": 0.15064758009543286, "grad_norm": 23.629373808760466, "learning_rate": 5.005675368898979e-06, "loss": 3.1739, "step": 442 }, { "epoch": 0.15098841172460806, "grad_norm": 29.6399617419849, "learning_rate": 5.017026106696936e-06, "loss": 3.1252, "step": 443 }, { "epoch": 0.15132924335378323, "grad_norm": 21.172144751351954, "learning_rate": 5.028376844494892e-06, "loss": 3.3184, "step": 444 }, { "epoch": 0.15167007498295842, "grad_norm": 23.057357184728996, "learning_rate": 5.0397275822928495e-06, "loss": 3.5939, "step": 445 }, { "epoch": 0.1520109066121336, "grad_norm": 27.113252748657676, "learning_rate": 5.051078320090806e-06, "loss": 3.1781, "step": 446 }, { "epoch": 0.1523517382413088, "grad_norm": 36.367359236269095, "learning_rate": 5.062429057888764e-06, "loss": 2.9693, "step": 447 }, { "epoch": 0.152692569870484, "grad_norm": 25.30322545526685, "learning_rate": 5.073779795686719e-06, "loss": 3.2579, "step": 448 }, { "epoch": 0.15303340149965916, "grad_norm": 19.654449748345346, "learning_rate": 5.085130533484677e-06, "loss": 3.4974, "step": 449 }, { "epoch": 0.15337423312883436, "grad_norm": 43.26822056020597, "learning_rate": 5.096481271282634e-06, "loss": 3.2836, "step": 450 }, { "epoch": 0.15371506475800956, "grad_norm": 24.330062143852118, "learning_rate": 5.107832009080591e-06, "loss": 3.2408, "step": 451 }, { "epoch": 0.15405589638718473, "grad_norm": 19.133937994585818, "learning_rate": 5.119182746878547e-06, "loss": 3.0242, "step": 452 }, { "epoch": 0.15439672801635992, "grad_norm": 48.633685665009445, "learning_rate": 5.130533484676504e-06, "loss": 3.0877, "step": 453 }, { "epoch": 0.1547375596455351, "grad_norm": 26.514135494712963, "learning_rate": 5.141884222474462e-06, "loss": 3.0649, "step": 454 }, { "epoch": 0.1550783912747103, "grad_norm": 20.996290329763468, "learning_rate": 5.153234960272418e-06, "loss": 3.0978, "step": 455 }, { "epoch": 0.1554192229038855, "grad_norm": 61.16073139055613, "learning_rate": 5.164585698070375e-06, "loss": 3.8337, "step": 456 }, { "epoch": 0.15576005453306066, "grad_norm": 27.475616250411083, "learning_rate": 5.175936435868332e-06, "loss": 3.3255, "step": 457 }, { "epoch": 0.15610088616223586, "grad_norm": 24.15334960963796, "learning_rate": 5.187287173666289e-06, "loss": 2.8465, "step": 458 }, { "epoch": 0.15644171779141106, "grad_norm": 18.78017018473755, "learning_rate": 5.198637911464246e-06, "loss": 2.992, "step": 459 }, { "epoch": 0.15678254942058623, "grad_norm": 27.93688557776103, "learning_rate": 5.209988649262202e-06, "loss": 2.8597, "step": 460 }, { "epoch": 0.15712338104976142, "grad_norm": 27.112359502110564, "learning_rate": 5.221339387060159e-06, "loss": 3.4835, "step": 461 }, { "epoch": 0.1574642126789366, "grad_norm": 29.72278679250819, "learning_rate": 5.2326901248581164e-06, "loss": 3.5909, "step": 462 }, { "epoch": 0.1578050443081118, "grad_norm": 21.40637610650895, "learning_rate": 5.244040862656073e-06, "loss": 3.4594, "step": 463 }, { "epoch": 0.158145875937287, "grad_norm": 61.602496668607145, "learning_rate": 5.25539160045403e-06, "loss": 3.3699, "step": 464 }, { "epoch": 0.15848670756646216, "grad_norm": 28.840009983672616, "learning_rate": 5.266742338251986e-06, "loss": 3.2751, "step": 465 }, { "epoch": 0.15882753919563736, "grad_norm": 26.81365738265123, "learning_rate": 5.278093076049944e-06, "loss": 3.3301, "step": 466 }, { "epoch": 0.15916837082481256, "grad_norm": 27.925863765473796, "learning_rate": 5.289443813847901e-06, "loss": 3.4233, "step": 467 }, { "epoch": 0.15950920245398773, "grad_norm": 47.48414190894158, "learning_rate": 5.300794551645857e-06, "loss": 3.4316, "step": 468 }, { "epoch": 0.15985003408316292, "grad_norm": 70.64816157414505, "learning_rate": 5.3121452894438146e-06, "loss": 3.5034, "step": 469 }, { "epoch": 0.1601908657123381, "grad_norm": 20.114132320428418, "learning_rate": 5.323496027241771e-06, "loss": 3.1561, "step": 470 }, { "epoch": 0.1605316973415133, "grad_norm": 34.28986509374082, "learning_rate": 5.334846765039729e-06, "loss": 2.7342, "step": 471 }, { "epoch": 0.1608725289706885, "grad_norm": 27.39827876146415, "learning_rate": 5.3461975028376844e-06, "loss": 3.907, "step": 472 }, { "epoch": 0.16121336059986366, "grad_norm": 30.980595751698406, "learning_rate": 5.357548240635642e-06, "loss": 3.1856, "step": 473 }, { "epoch": 0.16155419222903886, "grad_norm": 20.34585793915918, "learning_rate": 5.3688989784335986e-06, "loss": 3.2871, "step": 474 }, { "epoch": 0.16189502385821405, "grad_norm": 29.613228431912475, "learning_rate": 5.380249716231556e-06, "loss": 3.464, "step": 475 }, { "epoch": 0.16223585548738922, "grad_norm": 16.529127002039914, "learning_rate": 5.391600454029512e-06, "loss": 3.372, "step": 476 }, { "epoch": 0.16257668711656442, "grad_norm": 25.36139656186472, "learning_rate": 5.402951191827469e-06, "loss": 2.934, "step": 477 }, { "epoch": 0.1629175187457396, "grad_norm": 32.8902531556671, "learning_rate": 5.414301929625426e-06, "loss": 3.6385, "step": 478 }, { "epoch": 0.1632583503749148, "grad_norm": 19.83815607578902, "learning_rate": 5.425652667423383e-06, "loss": 2.9722, "step": 479 }, { "epoch": 0.16359918200409, "grad_norm": 43.49204282546653, "learning_rate": 5.437003405221339e-06, "loss": 3.7345, "step": 480 }, { "epoch": 0.16394001363326516, "grad_norm": 37.19314324987305, "learning_rate": 5.448354143019297e-06, "loss": 3.5872, "step": 481 }, { "epoch": 0.16428084526244036, "grad_norm": 24.74836508973546, "learning_rate": 5.459704880817253e-06, "loss": 3.5791, "step": 482 }, { "epoch": 0.16462167689161555, "grad_norm": 21.958278571643792, "learning_rate": 5.471055618615211e-06, "loss": 3.1751, "step": 483 }, { "epoch": 0.16496250852079072, "grad_norm": 36.33409307960184, "learning_rate": 5.4824063564131666e-06, "loss": 3.2012, "step": 484 }, { "epoch": 0.16530334014996592, "grad_norm": 32.96927940111071, "learning_rate": 5.493757094211124e-06, "loss": 3.3522, "step": 485 }, { "epoch": 0.1656441717791411, "grad_norm": 25.06118256145394, "learning_rate": 5.5051078320090815e-06, "loss": 3.7034, "step": 486 }, { "epoch": 0.1659850034083163, "grad_norm": 22.543937631502462, "learning_rate": 5.516458569807038e-06, "loss": 3.4567, "step": 487 }, { "epoch": 0.1663258350374915, "grad_norm": 52.51359285146997, "learning_rate": 5.527809307604994e-06, "loss": 3.9333, "step": 488 }, { "epoch": 0.16666666666666666, "grad_norm": 26.021244247952673, "learning_rate": 5.539160045402951e-06, "loss": 3.5291, "step": 489 }, { "epoch": 0.16700749829584186, "grad_norm": 26.653321198038487, "learning_rate": 5.550510783200909e-06, "loss": 3.4493, "step": 490 }, { "epoch": 0.16734832992501705, "grad_norm": 29.705759265257434, "learning_rate": 5.5618615209988655e-06, "loss": 3.2509, "step": 491 }, { "epoch": 0.16768916155419222, "grad_norm": 23.62812249973601, "learning_rate": 5.573212258796822e-06, "loss": 2.8018, "step": 492 }, { "epoch": 0.16802999318336742, "grad_norm": 21.88425871582131, "learning_rate": 5.584562996594779e-06, "loss": 3.1126, "step": 493 }, { "epoch": 0.1683708248125426, "grad_norm": 24.546216850289547, "learning_rate": 5.595913734392736e-06, "loss": 2.7836, "step": 494 }, { "epoch": 0.1687116564417178, "grad_norm": 34.82866566566624, "learning_rate": 5.607264472190693e-06, "loss": 3.9109, "step": 495 }, { "epoch": 0.169052488070893, "grad_norm": 29.68630132767053, "learning_rate": 5.6186152099886495e-06, "loss": 3.282, "step": 496 }, { "epoch": 0.16939331970006816, "grad_norm": 36.31462653618816, "learning_rate": 5.629965947786606e-06, "loss": 3.0522, "step": 497 }, { "epoch": 0.16973415132924335, "grad_norm": 38.29860915861816, "learning_rate": 5.641316685584564e-06, "loss": 3.3108, "step": 498 }, { "epoch": 0.17007498295841855, "grad_norm": 20.82118546458167, "learning_rate": 5.652667423382521e-06, "loss": 3.0351, "step": 499 }, { "epoch": 0.17041581458759372, "grad_norm": 34.82508193823718, "learning_rate": 5.664018161180477e-06, "loss": 3.3756, "step": 500 }, { "epoch": 0.17075664621676892, "grad_norm": 23.454176736152267, "learning_rate": 5.6753688989784335e-06, "loss": 3.0129, "step": 501 }, { "epoch": 0.1710974778459441, "grad_norm": 30.301950651779755, "learning_rate": 5.686719636776391e-06, "loss": 3.9632, "step": 502 }, { "epoch": 0.1714383094751193, "grad_norm": 21.07094418329733, "learning_rate": 5.6980703745743485e-06, "loss": 3.1781, "step": 503 }, { "epoch": 0.17177914110429449, "grad_norm": 21.8762824271937, "learning_rate": 5.709421112372304e-06, "loss": 3.1337, "step": 504 }, { "epoch": 0.17211997273346966, "grad_norm": 21.282604205711603, "learning_rate": 5.720771850170262e-06, "loss": 3.0701, "step": 505 }, { "epoch": 0.17246080436264485, "grad_norm": 30.424910670065746, "learning_rate": 5.732122587968218e-06, "loss": 2.9431, "step": 506 }, { "epoch": 0.17280163599182005, "grad_norm": 30.62379258718881, "learning_rate": 5.743473325766176e-06, "loss": 3.6791, "step": 507 }, { "epoch": 0.17314246762099522, "grad_norm": 21.003050551550963, "learning_rate": 5.754824063564132e-06, "loss": 3.4311, "step": 508 }, { "epoch": 0.17348329925017042, "grad_norm": 60.192278779691044, "learning_rate": 5.766174801362089e-06, "loss": 3.8561, "step": 509 }, { "epoch": 0.1738241308793456, "grad_norm": 35.40379264262327, "learning_rate": 5.777525539160046e-06, "loss": 2.9516, "step": 510 }, { "epoch": 0.1741649625085208, "grad_norm": 18.36607888401351, "learning_rate": 5.788876276958003e-06, "loss": 2.714, "step": 511 }, { "epoch": 0.17450579413769599, "grad_norm": 21.387287581520276, "learning_rate": 5.800227014755959e-06, "loss": 3.139, "step": 512 }, { "epoch": 0.17484662576687116, "grad_norm": 21.61281850539856, "learning_rate": 5.8115777525539165e-06, "loss": 3.0792, "step": 513 }, { "epoch": 0.17518745739604635, "grad_norm": 21.06866955887101, "learning_rate": 5.822928490351873e-06, "loss": 3.354, "step": 514 }, { "epoch": 0.17552828902522155, "grad_norm": 22.685552267589994, "learning_rate": 5.834279228149831e-06, "loss": 2.9687, "step": 515 }, { "epoch": 0.17586912065439672, "grad_norm": 59.43098420090411, "learning_rate": 5.845629965947786e-06, "loss": 3.3576, "step": 516 }, { "epoch": 0.17620995228357192, "grad_norm": 35.530390004546845, "learning_rate": 5.856980703745744e-06, "loss": 3.2322, "step": 517 }, { "epoch": 0.17655078391274712, "grad_norm": 29.052100138777856, "learning_rate": 5.8683314415437005e-06, "loss": 3.7539, "step": 518 }, { "epoch": 0.1768916155419223, "grad_norm": 24.715777197638086, "learning_rate": 5.879682179341658e-06, "loss": 3.2554, "step": 519 }, { "epoch": 0.17723244717109748, "grad_norm": 25.946668369183694, "learning_rate": 5.891032917139614e-06, "loss": 3.4278, "step": 520 }, { "epoch": 0.17757327880027265, "grad_norm": 27.789637451281425, "learning_rate": 5.902383654937571e-06, "loss": 3.0673, "step": 521 }, { "epoch": 0.17791411042944785, "grad_norm": 19.579369224944006, "learning_rate": 5.913734392735529e-06, "loss": 3.4305, "step": 522 }, { "epoch": 0.17825494205862305, "grad_norm": 22.39483266932541, "learning_rate": 5.925085130533485e-06, "loss": 3.0534, "step": 523 }, { "epoch": 0.17859577368779822, "grad_norm": 44.237088149564514, "learning_rate": 5.936435868331442e-06, "loss": 3.0159, "step": 524 }, { "epoch": 0.17893660531697342, "grad_norm": 26.568403074077814, "learning_rate": 5.947786606129399e-06, "loss": 3.1667, "step": 525 }, { "epoch": 0.17927743694614862, "grad_norm": 25.173552082789396, "learning_rate": 5.959137343927356e-06, "loss": 3.4813, "step": 526 }, { "epoch": 0.1796182685753238, "grad_norm": 32.56248490034913, "learning_rate": 5.970488081725313e-06, "loss": 2.9497, "step": 527 }, { "epoch": 0.17995910020449898, "grad_norm": 23.249643200902003, "learning_rate": 5.981838819523269e-06, "loss": 3.5269, "step": 528 }, { "epoch": 0.18029993183367415, "grad_norm": 27.76675467274532, "learning_rate": 5.993189557321226e-06, "loss": 3.7243, "step": 529 }, { "epoch": 0.18064076346284935, "grad_norm": 69.52770018289618, "learning_rate": 6.0045402951191835e-06, "loss": 3.2684, "step": 530 }, { "epoch": 0.18098159509202455, "grad_norm": 48.14771226964696, "learning_rate": 6.01589103291714e-06, "loss": 3.1029, "step": 531 }, { "epoch": 0.18132242672119972, "grad_norm": 23.529496028409927, "learning_rate": 6.027241770715097e-06, "loss": 3.4752, "step": 532 }, { "epoch": 0.18166325835037492, "grad_norm": 37.56672172284196, "learning_rate": 6.038592508513053e-06, "loss": 3.5109, "step": 533 }, { "epoch": 0.18200408997955012, "grad_norm": 23.51891776769473, "learning_rate": 6.049943246311011e-06, "loss": 2.8566, "step": 534 }, { "epoch": 0.18234492160872529, "grad_norm": 23.582532803143764, "learning_rate": 6.061293984108968e-06, "loss": 3.4881, "step": 535 }, { "epoch": 0.18268575323790048, "grad_norm": 20.799796234302473, "learning_rate": 6.072644721906924e-06, "loss": 3.5924, "step": 536 }, { "epoch": 0.18302658486707565, "grad_norm": 19.19297848708376, "learning_rate": 6.083995459704881e-06, "loss": 3.5124, "step": 537 }, { "epoch": 0.18336741649625085, "grad_norm": 27.00735853185107, "learning_rate": 6.095346197502838e-06, "loss": 3.3367, "step": 538 }, { "epoch": 0.18370824812542605, "grad_norm": 21.534487939573243, "learning_rate": 6.106696935300796e-06, "loss": 3.1773, "step": 539 }, { "epoch": 0.18404907975460122, "grad_norm": 22.180992387010704, "learning_rate": 6.1180476730987515e-06, "loss": 3.1487, "step": 540 }, { "epoch": 0.18438991138377642, "grad_norm": 26.55602329672257, "learning_rate": 6.129398410896709e-06, "loss": 3.3879, "step": 541 }, { "epoch": 0.18473074301295161, "grad_norm": 18.020067503370125, "learning_rate": 6.140749148694666e-06, "loss": 3.2404, "step": 542 }, { "epoch": 0.18507157464212678, "grad_norm": 29.05586953676158, "learning_rate": 6.152099886492623e-06, "loss": 3.3288, "step": 543 }, { "epoch": 0.18541240627130198, "grad_norm": 23.348934970482244, "learning_rate": 6.163450624290579e-06, "loss": 3.2509, "step": 544 }, { "epoch": 0.18575323790047715, "grad_norm": 22.320472879988202, "learning_rate": 6.174801362088536e-06, "loss": 3.2028, "step": 545 }, { "epoch": 0.18609406952965235, "grad_norm": 32.254979285161916, "learning_rate": 6.186152099886493e-06, "loss": 3.8129, "step": 546 }, { "epoch": 0.18643490115882755, "grad_norm": 25.641025759100756, "learning_rate": 6.1975028376844505e-06, "loss": 3.6254, "step": 547 }, { "epoch": 0.18677573278800272, "grad_norm": 31.149406715257253, "learning_rate": 6.208853575482406e-06, "loss": 3.3846, "step": 548 }, { "epoch": 0.18711656441717792, "grad_norm": 65.36951879493435, "learning_rate": 6.220204313280364e-06, "loss": 2.8993, "step": 549 }, { "epoch": 0.18745739604635311, "grad_norm": 25.3019528954931, "learning_rate": 6.23155505107832e-06, "loss": 3.624, "step": 550 }, { "epoch": 0.18779822767552828, "grad_norm": 29.21740242879814, "learning_rate": 6.242905788876278e-06, "loss": 3.8837, "step": 551 }, { "epoch": 0.18813905930470348, "grad_norm": 32.63382112057123, "learning_rate": 6.254256526674234e-06, "loss": 3.5399, "step": 552 }, { "epoch": 0.18847989093387865, "grad_norm": 69.057475658061, "learning_rate": 6.265607264472191e-06, "loss": 3.7291, "step": 553 }, { "epoch": 0.18882072256305385, "grad_norm": 23.130018847626225, "learning_rate": 6.2769580022701486e-06, "loss": 3.2533, "step": 554 }, { "epoch": 0.18916155419222905, "grad_norm": 22.99519857056165, "learning_rate": 6.288308740068105e-06, "loss": 3.1907, "step": 555 }, { "epoch": 0.18950238582140422, "grad_norm": 27.710510992977078, "learning_rate": 6.299659477866061e-06, "loss": 3.5782, "step": 556 }, { "epoch": 0.18984321745057942, "grad_norm": 29.093663289144295, "learning_rate": 6.3110102156640185e-06, "loss": 3.2991, "step": 557 }, { "epoch": 0.1901840490797546, "grad_norm": 32.320285271160294, "learning_rate": 6.322360953461976e-06, "loss": 3.5297, "step": 558 }, { "epoch": 0.19052488070892978, "grad_norm": 25.98250722744346, "learning_rate": 6.3337116912599326e-06, "loss": 2.7344, "step": 559 }, { "epoch": 0.19086571233810498, "grad_norm": 28.863510743642383, "learning_rate": 6.345062429057889e-06, "loss": 3.8643, "step": 560 }, { "epoch": 0.19120654396728015, "grad_norm": 26.569021530505545, "learning_rate": 6.356413166855846e-06, "loss": 3.7556, "step": 561 }, { "epoch": 0.19154737559645535, "grad_norm": 35.03092170777891, "learning_rate": 6.367763904653803e-06, "loss": 3.9044, "step": 562 }, { "epoch": 0.19188820722563055, "grad_norm": 26.059202918954323, "learning_rate": 6.37911464245176e-06, "loss": 3.2767, "step": 563 }, { "epoch": 0.19222903885480572, "grad_norm": 31.5134549581871, "learning_rate": 6.390465380249717e-06, "loss": 3.6411, "step": 564 }, { "epoch": 0.19256987048398091, "grad_norm": 26.877243467440774, "learning_rate": 6.401816118047673e-06, "loss": 3.3555, "step": 565 }, { "epoch": 0.1929107021131561, "grad_norm": 31.846154366195382, "learning_rate": 6.413166855845631e-06, "loss": 4.0248, "step": 566 }, { "epoch": 0.19325153374233128, "grad_norm": 18.616499143339563, "learning_rate": 6.424517593643587e-06, "loss": 2.933, "step": 567 }, { "epoch": 0.19359236537150648, "grad_norm": 23.093805714330088, "learning_rate": 6.435868331441544e-06, "loss": 3.281, "step": 568 }, { "epoch": 0.19393319700068165, "grad_norm": 30.83559569423827, "learning_rate": 6.447219069239501e-06, "loss": 3.634, "step": 569 }, { "epoch": 0.19427402862985685, "grad_norm": 32.73706036350775, "learning_rate": 6.458569807037458e-06, "loss": 3.4743, "step": 570 }, { "epoch": 0.19461486025903205, "grad_norm": 74.24147204735313, "learning_rate": 6.4699205448354155e-06, "loss": 3.7428, "step": 571 }, { "epoch": 0.19495569188820722, "grad_norm": 52.37636479029868, "learning_rate": 6.481271282633371e-06, "loss": 3.8128, "step": 572 }, { "epoch": 0.19529652351738241, "grad_norm": 29.32971293970952, "learning_rate": 6.492622020431329e-06, "loss": 3.694, "step": 573 }, { "epoch": 0.1956373551465576, "grad_norm": 53.82014887024488, "learning_rate": 6.5039727582292854e-06, "loss": 3.7754, "step": 574 }, { "epoch": 0.19597818677573278, "grad_norm": 28.850003121925102, "learning_rate": 6.515323496027243e-06, "loss": 2.7847, "step": 575 }, { "epoch": 0.19631901840490798, "grad_norm": 22.637784128737593, "learning_rate": 6.526674233825199e-06, "loss": 3.358, "step": 576 }, { "epoch": 0.19665985003408315, "grad_norm": 35.61107167179696, "learning_rate": 6.538024971623156e-06, "loss": 3.2443, "step": 577 }, { "epoch": 0.19700068166325835, "grad_norm": 31.80317806655503, "learning_rate": 6.549375709421113e-06, "loss": 4.0864, "step": 578 }, { "epoch": 0.19734151329243355, "grad_norm": 56.91706624954299, "learning_rate": 6.56072644721907e-06, "loss": 3.4392, "step": 579 }, { "epoch": 0.19768234492160872, "grad_norm": 23.213226100015838, "learning_rate": 6.572077185017026e-06, "loss": 3.5135, "step": 580 }, { "epoch": 0.1980231765507839, "grad_norm": 24.026197653791765, "learning_rate": 6.5834279228149836e-06, "loss": 3.3591, "step": 581 }, { "epoch": 0.1983640081799591, "grad_norm": 18.62673726649923, "learning_rate": 6.59477866061294e-06, "loss": 3.3976, "step": 582 }, { "epoch": 0.19870483980913428, "grad_norm": 22.60551870479549, "learning_rate": 6.606129398410898e-06, "loss": 3.3827, "step": 583 }, { "epoch": 0.19904567143830948, "grad_norm": 18.652295961874174, "learning_rate": 6.6174801362088534e-06, "loss": 3.4108, "step": 584 }, { "epoch": 0.19938650306748465, "grad_norm": 41.949281859304556, "learning_rate": 6.628830874006811e-06, "loss": 3.76, "step": 585 }, { "epoch": 0.19972733469665985, "grad_norm": 39.01023364768664, "learning_rate": 6.6401816118047676e-06, "loss": 2.827, "step": 586 }, { "epoch": 0.20006816632583505, "grad_norm": 20.878516587572626, "learning_rate": 6.651532349602725e-06, "loss": 2.9216, "step": 587 }, { "epoch": 0.20040899795501022, "grad_norm": 21.146966529246615, "learning_rate": 6.662883087400681e-06, "loss": 3.6306, "step": 588 }, { "epoch": 0.2007498295841854, "grad_norm": 24.592433533071503, "learning_rate": 6.674233825198638e-06, "loss": 3.3781, "step": 589 }, { "epoch": 0.2010906612133606, "grad_norm": 24.98703287628922, "learning_rate": 6.685584562996596e-06, "loss": 3.2197, "step": 590 }, { "epoch": 0.20143149284253578, "grad_norm": 16.002604694344356, "learning_rate": 6.696935300794552e-06, "loss": 3.542, "step": 591 }, { "epoch": 0.20177232447171098, "grad_norm": 20.538071312675253, "learning_rate": 6.708286038592508e-06, "loss": 2.7127, "step": 592 }, { "epoch": 0.20211315610088615, "grad_norm": 20.030323124806916, "learning_rate": 6.719636776390466e-06, "loss": 3.1837, "step": 593 }, { "epoch": 0.20245398773006135, "grad_norm": 32.797764482647544, "learning_rate": 6.730987514188423e-06, "loss": 3.8178, "step": 594 }, { "epoch": 0.20279481935923654, "grad_norm": 22.904398668139148, "learning_rate": 6.74233825198638e-06, "loss": 3.4008, "step": 595 }, { "epoch": 0.20313565098841171, "grad_norm": 19.788930144434676, "learning_rate": 6.753688989784336e-06, "loss": 2.7548, "step": 596 }, { "epoch": 0.2034764826175869, "grad_norm": 31.324709717321397, "learning_rate": 6.765039727582293e-06, "loss": 3.23, "step": 597 }, { "epoch": 0.2038173142467621, "grad_norm": 22.780068383723776, "learning_rate": 6.7763904653802505e-06, "loss": 3.0165, "step": 598 }, { "epoch": 0.20415814587593728, "grad_norm": 29.15127501460754, "learning_rate": 6.787741203178207e-06, "loss": 3.2529, "step": 599 }, { "epoch": 0.20449897750511248, "grad_norm": 17.40007686762209, "learning_rate": 6.799091940976164e-06, "loss": 2.9501, "step": 600 }, { "epoch": 0.20483980913428765, "grad_norm": 35.4553165212774, "learning_rate": 6.81044267877412e-06, "loss": 3.2171, "step": 601 }, { "epoch": 0.20518064076346285, "grad_norm": 19.103009345198597, "learning_rate": 6.821793416572078e-06, "loss": 3.5418, "step": 602 }, { "epoch": 0.20552147239263804, "grad_norm": 16.255161420164146, "learning_rate": 6.833144154370035e-06, "loss": 2.8603, "step": 603 }, { "epoch": 0.2058623040218132, "grad_norm": 28.72726768967839, "learning_rate": 6.844494892167991e-06, "loss": 3.3756, "step": 604 }, { "epoch": 0.2062031356509884, "grad_norm": 24.086255412194006, "learning_rate": 6.855845629965948e-06, "loss": 2.87, "step": 605 }, { "epoch": 0.2065439672801636, "grad_norm": 22.93265280345955, "learning_rate": 6.867196367763905e-06, "loss": 3.0186, "step": 606 }, { "epoch": 0.20688479890933878, "grad_norm": 22.14819204158573, "learning_rate": 6.878547105561863e-06, "loss": 3.0624, "step": 607 }, { "epoch": 0.20722563053851398, "grad_norm": 28.744363040752805, "learning_rate": 6.8898978433598185e-06, "loss": 2.8562, "step": 608 }, { "epoch": 0.20756646216768918, "grad_norm": 25.410481577215627, "learning_rate": 6.901248581157776e-06, "loss": 3.752, "step": 609 }, { "epoch": 0.20790729379686435, "grad_norm": 15.134889637275402, "learning_rate": 6.912599318955733e-06, "loss": 2.6328, "step": 610 }, { "epoch": 0.20824812542603954, "grad_norm": 31.79976657329084, "learning_rate": 6.92395005675369e-06, "loss": 3.3946, "step": 611 }, { "epoch": 0.2085889570552147, "grad_norm": 27.687031231491343, "learning_rate": 6.935300794551646e-06, "loss": 3.0865, "step": 612 }, { "epoch": 0.2089297886843899, "grad_norm": 40.862916583217206, "learning_rate": 6.946651532349603e-06, "loss": 3.5715, "step": 613 }, { "epoch": 0.2092706203135651, "grad_norm": 18.75215254358727, "learning_rate": 6.95800227014756e-06, "loss": 3.1755, "step": 614 }, { "epoch": 0.20961145194274028, "grad_norm": 30.292898763706404, "learning_rate": 6.9693530079455175e-06, "loss": 3.7768, "step": 615 }, { "epoch": 0.20995228357191548, "grad_norm": 13.881501658004648, "learning_rate": 6.980703745743473e-06, "loss": 2.8044, "step": 616 }, { "epoch": 0.21029311520109067, "grad_norm": 21.092000392401346, "learning_rate": 6.992054483541431e-06, "loss": 3.1219, "step": 617 }, { "epoch": 0.21063394683026584, "grad_norm": 26.58209487672545, "learning_rate": 7.003405221339387e-06, "loss": 2.8802, "step": 618 }, { "epoch": 0.21097477845944104, "grad_norm": 31.660388117348845, "learning_rate": 7.014755959137345e-06, "loss": 3.2726, "step": 619 }, { "epoch": 0.2113156100886162, "grad_norm": 24.023857227036515, "learning_rate": 7.026106696935301e-06, "loss": 3.5171, "step": 620 }, { "epoch": 0.2116564417177914, "grad_norm": 26.673484087347017, "learning_rate": 7.037457434733258e-06, "loss": 3.0256, "step": 621 }, { "epoch": 0.2119972733469666, "grad_norm": 30.92921940135245, "learning_rate": 7.048808172531215e-06, "loss": 3.5411, "step": 622 }, { "epoch": 0.21233810497614178, "grad_norm": 38.60252627194752, "learning_rate": 7.060158910329172e-06, "loss": 3.759, "step": 623 }, { "epoch": 0.21267893660531698, "grad_norm": 31.10064524411684, "learning_rate": 7.071509648127128e-06, "loss": 3.7815, "step": 624 }, { "epoch": 0.21301976823449217, "grad_norm": 39.91766960239971, "learning_rate": 7.0828603859250855e-06, "loss": 3.4385, "step": 625 }, { "epoch": 0.21336059986366734, "grad_norm": 14.943101718627684, "learning_rate": 7.094211123723043e-06, "loss": 2.6464, "step": 626 }, { "epoch": 0.21370143149284254, "grad_norm": 19.70803536364196, "learning_rate": 7.105561861521e-06, "loss": 3.0411, "step": 627 }, { "epoch": 0.2140422631220177, "grad_norm": 37.62224083588128, "learning_rate": 7.116912599318956e-06, "loss": 3.8128, "step": 628 }, { "epoch": 0.2143830947511929, "grad_norm": 37.576809504593655, "learning_rate": 7.128263337116913e-06, "loss": 3.7837, "step": 629 }, { "epoch": 0.2147239263803681, "grad_norm": 22.55171677882715, "learning_rate": 7.13961407491487e-06, "loss": 3.1353, "step": 630 }, { "epoch": 0.21506475800954328, "grad_norm": 36.21815291111241, "learning_rate": 7.150964812712827e-06, "loss": 3.5351, "step": 631 }, { "epoch": 0.21540558963871848, "grad_norm": 39.692900148232155, "learning_rate": 7.162315550510784e-06, "loss": 3.2352, "step": 632 }, { "epoch": 0.21574642126789367, "grad_norm": 32.82540343283596, "learning_rate": 7.17366628830874e-06, "loss": 3.8771, "step": 633 }, { "epoch": 0.21608725289706884, "grad_norm": 24.560724045206307, "learning_rate": 7.185017026106698e-06, "loss": 3.3772, "step": 634 }, { "epoch": 0.21642808452624404, "grad_norm": 29.950185048820973, "learning_rate": 7.196367763904654e-06, "loss": 3.9657, "step": 635 }, { "epoch": 0.2167689161554192, "grad_norm": 93.5947988373926, "learning_rate": 7.207718501702611e-06, "loss": 4.0873, "step": 636 }, { "epoch": 0.2171097477845944, "grad_norm": 54.64434237343563, "learning_rate": 7.219069239500568e-06, "loss": 3.1608, "step": 637 }, { "epoch": 0.2174505794137696, "grad_norm": 52.679036813503096, "learning_rate": 7.230419977298525e-06, "loss": 3.366, "step": 638 }, { "epoch": 0.21779141104294478, "grad_norm": 50.88533586750164, "learning_rate": 7.241770715096483e-06, "loss": 4.246, "step": 639 }, { "epoch": 0.21813224267211997, "grad_norm": 100.43471071529694, "learning_rate": 7.253121452894438e-06, "loss": 3.4288, "step": 640 }, { "epoch": 0.21847307430129517, "grad_norm": 32.295720899661305, "learning_rate": 7.264472190692395e-06, "loss": 4.2704, "step": 641 }, { "epoch": 0.21881390593047034, "grad_norm": 45.96512548500402, "learning_rate": 7.2758229284903525e-06, "loss": 3.3498, "step": 642 }, { "epoch": 0.21915473755964554, "grad_norm": 28.28620841578828, "learning_rate": 7.28717366628831e-06, "loss": 3.3084, "step": 643 }, { "epoch": 0.2194955691888207, "grad_norm": 25.409620232475053, "learning_rate": 7.298524404086266e-06, "loss": 3.4964, "step": 644 }, { "epoch": 0.2198364008179959, "grad_norm": 22.428681971089635, "learning_rate": 7.309875141884223e-06, "loss": 3.2854, "step": 645 }, { "epoch": 0.2201772324471711, "grad_norm": 22.928748643237626, "learning_rate": 7.32122587968218e-06, "loss": 3.404, "step": 646 }, { "epoch": 0.22051806407634628, "grad_norm": 21.043022530781105, "learning_rate": 7.332576617480137e-06, "loss": 3.2107, "step": 647 }, { "epoch": 0.22085889570552147, "grad_norm": 19.716945006988414, "learning_rate": 7.343927355278093e-06, "loss": 3.1192, "step": 648 }, { "epoch": 0.22119972733469667, "grad_norm": 17.286542988844342, "learning_rate": 7.355278093076051e-06, "loss": 2.9502, "step": 649 }, { "epoch": 0.22154055896387184, "grad_norm": 26.21972544307366, "learning_rate": 7.366628830874007e-06, "loss": 3.8412, "step": 650 }, { "epoch": 0.22188139059304704, "grad_norm": 31.10776378157114, "learning_rate": 7.377979568671965e-06, "loss": 2.7104, "step": 651 }, { "epoch": 0.2222222222222222, "grad_norm": 13.920521221873775, "learning_rate": 7.3893303064699205e-06, "loss": 3.0405, "step": 652 }, { "epoch": 0.2225630538513974, "grad_norm": 26.30217501825908, "learning_rate": 7.400681044267878e-06, "loss": 3.2028, "step": 653 }, { "epoch": 0.2229038854805726, "grad_norm": 38.52663870658809, "learning_rate": 7.412031782065835e-06, "loss": 3.0826, "step": 654 }, { "epoch": 0.22324471710974778, "grad_norm": 20.949763358462004, "learning_rate": 7.423382519863792e-06, "loss": 3.3962, "step": 655 }, { "epoch": 0.22358554873892297, "grad_norm": 20.39882608622639, "learning_rate": 7.434733257661748e-06, "loss": 3.3253, "step": 656 }, { "epoch": 0.22392638036809817, "grad_norm": 22.28491514603502, "learning_rate": 7.446083995459705e-06, "loss": 3.3699, "step": 657 }, { "epoch": 0.22426721199727334, "grad_norm": 28.94865453737843, "learning_rate": 7.457434733257663e-06, "loss": 3.2308, "step": 658 }, { "epoch": 0.22460804362644854, "grad_norm": 21.38604474315535, "learning_rate": 7.4687854710556195e-06, "loss": 3.2837, "step": 659 }, { "epoch": 0.2249488752556237, "grad_norm": 21.38499784066535, "learning_rate": 7.480136208853575e-06, "loss": 3.1846, "step": 660 }, { "epoch": 0.2252897068847989, "grad_norm": 22.93379307642456, "learning_rate": 7.491486946651533e-06, "loss": 3.141, "step": 661 }, { "epoch": 0.2256305385139741, "grad_norm": 30.304881382249665, "learning_rate": 7.50283768444949e-06, "loss": 3.4798, "step": 662 }, { "epoch": 0.22597137014314927, "grad_norm": 31.36002944797405, "learning_rate": 7.514188422247447e-06, "loss": 3.7983, "step": 663 }, { "epoch": 0.22631220177232447, "grad_norm": 26.097087673755233, "learning_rate": 7.5255391600454035e-06, "loss": 3.499, "step": 664 }, { "epoch": 0.22665303340149967, "grad_norm": 20.57046037909741, "learning_rate": 7.53688989784336e-06, "loss": 3.2229, "step": 665 }, { "epoch": 0.22699386503067484, "grad_norm": 47.562878709860364, "learning_rate": 7.5482406356413176e-06, "loss": 3.7525, "step": 666 }, { "epoch": 0.22733469665985004, "grad_norm": 27.390594888468755, "learning_rate": 7.559591373439274e-06, "loss": 2.6644, "step": 667 }, { "epoch": 0.2276755282890252, "grad_norm": 25.47541088784383, "learning_rate": 7.570942111237231e-06, "loss": 3.6583, "step": 668 }, { "epoch": 0.2280163599182004, "grad_norm": 29.155300987307562, "learning_rate": 7.5822928490351875e-06, "loss": 3.5922, "step": 669 }, { "epoch": 0.2283571915473756, "grad_norm": 31.10451556232216, "learning_rate": 7.593643586833145e-06, "loss": 3.5067, "step": 670 }, { "epoch": 0.22869802317655077, "grad_norm": 18.149757333009948, "learning_rate": 7.6049943246311016e-06, "loss": 3.1868, "step": 671 }, { "epoch": 0.22903885480572597, "grad_norm": 27.50169359632286, "learning_rate": 7.616345062429058e-06, "loss": 3.6593, "step": 672 }, { "epoch": 0.22937968643490117, "grad_norm": 16.656298612085447, "learning_rate": 7.627695800227015e-06, "loss": 2.9243, "step": 673 }, { "epoch": 0.22972051806407634, "grad_norm": 35.99538537724704, "learning_rate": 7.639046538024972e-06, "loss": 3.5115, "step": 674 }, { "epoch": 0.23006134969325154, "grad_norm": 21.238927116764263, "learning_rate": 7.650397275822929e-06, "loss": 3.653, "step": 675 }, { "epoch": 0.2304021813224267, "grad_norm": 24.789517952536414, "learning_rate": 7.661748013620886e-06, "loss": 3.3872, "step": 676 }, { "epoch": 0.2307430129516019, "grad_norm": 55.74348345838816, "learning_rate": 7.673098751418842e-06, "loss": 3.7864, "step": 677 }, { "epoch": 0.2310838445807771, "grad_norm": 43.076035949013175, "learning_rate": 7.6844494892168e-06, "loss": 3.107, "step": 678 }, { "epoch": 0.23142467620995227, "grad_norm": 29.924170179780113, "learning_rate": 7.695800227014757e-06, "loss": 4.0193, "step": 679 }, { "epoch": 0.23176550783912747, "grad_norm": 29.953359971908668, "learning_rate": 7.707150964812714e-06, "loss": 3.7907, "step": 680 }, { "epoch": 0.23210633946830267, "grad_norm": 52.94006135299579, "learning_rate": 7.71850170261067e-06, "loss": 3.2253, "step": 681 }, { "epoch": 0.23244717109747784, "grad_norm": 28.263883812667867, "learning_rate": 7.729852440408627e-06, "loss": 2.833, "step": 682 }, { "epoch": 0.23278800272665304, "grad_norm": 30.089251094826754, "learning_rate": 7.741203178206584e-06, "loss": 3.4134, "step": 683 }, { "epoch": 0.2331288343558282, "grad_norm": 30.094731514110844, "learning_rate": 7.75255391600454e-06, "loss": 2.7839, "step": 684 }, { "epoch": 0.2334696659850034, "grad_norm": 29.72830250491126, "learning_rate": 7.763904653802497e-06, "loss": 3.7319, "step": 685 }, { "epoch": 0.2338104976141786, "grad_norm": 18.0735291631865, "learning_rate": 7.775255391600455e-06, "loss": 3.071, "step": 686 }, { "epoch": 0.23415132924335377, "grad_norm": 23.88613291196941, "learning_rate": 7.786606129398412e-06, "loss": 3.0693, "step": 687 }, { "epoch": 0.23449216087252897, "grad_norm": 29.573233621949626, "learning_rate": 7.797956867196369e-06, "loss": 3.0225, "step": 688 }, { "epoch": 0.23483299250170417, "grad_norm": 29.542537263618925, "learning_rate": 7.809307604994325e-06, "loss": 3.1858, "step": 689 }, { "epoch": 0.23517382413087934, "grad_norm": 18.216259742446347, "learning_rate": 7.820658342792282e-06, "loss": 2.8649, "step": 690 }, { "epoch": 0.23551465576005454, "grad_norm": 125.65672187700817, "learning_rate": 7.83200908059024e-06, "loss": 3.3388, "step": 691 }, { "epoch": 0.23585548738922973, "grad_norm": 17.380752037557542, "learning_rate": 7.843359818388195e-06, "loss": 3.0402, "step": 692 }, { "epoch": 0.2361963190184049, "grad_norm": 23.594368754879273, "learning_rate": 7.854710556186152e-06, "loss": 2.6934, "step": 693 }, { "epoch": 0.2365371506475801, "grad_norm": 31.50893297161558, "learning_rate": 7.86606129398411e-06, "loss": 3.1054, "step": 694 }, { "epoch": 0.23687798227675527, "grad_norm": 33.8795403124054, "learning_rate": 7.877412031782067e-06, "loss": 3.3545, "step": 695 }, { "epoch": 0.23721881390593047, "grad_norm": 44.769421658207584, "learning_rate": 7.888762769580023e-06, "loss": 3.7756, "step": 696 }, { "epoch": 0.23755964553510567, "grad_norm": 28.234180617869896, "learning_rate": 7.90011350737798e-06, "loss": 3.6009, "step": 697 }, { "epoch": 0.23790047716428084, "grad_norm": 15.334225224695182, "learning_rate": 7.911464245175937e-06, "loss": 3.5085, "step": 698 }, { "epoch": 0.23824130879345604, "grad_norm": 21.915567193808116, "learning_rate": 7.922814982973895e-06, "loss": 3.002, "step": 699 }, { "epoch": 0.23858214042263123, "grad_norm": 34.603261266247074, "learning_rate": 7.93416572077185e-06, "loss": 3.5473, "step": 700 }, { "epoch": 0.2389229720518064, "grad_norm": 21.1563570463816, "learning_rate": 7.945516458569808e-06, "loss": 3.3214, "step": 701 }, { "epoch": 0.2392638036809816, "grad_norm": 39.49400937014496, "learning_rate": 7.956867196367765e-06, "loss": 3.5622, "step": 702 }, { "epoch": 0.23960463531015677, "grad_norm": 21.322113611123598, "learning_rate": 7.968217934165721e-06, "loss": 3.2879, "step": 703 }, { "epoch": 0.23994546693933197, "grad_norm": 40.04452731635841, "learning_rate": 7.979568671963678e-06, "loss": 3.6135, "step": 704 }, { "epoch": 0.24028629856850717, "grad_norm": 28.92576996822211, "learning_rate": 7.990919409761635e-06, "loss": 3.1345, "step": 705 }, { "epoch": 0.24062713019768234, "grad_norm": 24.673789690307704, "learning_rate": 8.002270147559591e-06, "loss": 3.6303, "step": 706 }, { "epoch": 0.24096796182685754, "grad_norm": 35.41563319616739, "learning_rate": 8.01362088535755e-06, "loss": 3.6358, "step": 707 }, { "epoch": 0.24130879345603273, "grad_norm": 26.08047689137244, "learning_rate": 8.024971623155505e-06, "loss": 3.5254, "step": 708 }, { "epoch": 0.2416496250852079, "grad_norm": 17.559255018922954, "learning_rate": 8.036322360953463e-06, "loss": 3.0724, "step": 709 }, { "epoch": 0.2419904567143831, "grad_norm": 31.95682148618894, "learning_rate": 8.04767309875142e-06, "loss": 3.7461, "step": 710 }, { "epoch": 0.24233128834355827, "grad_norm": 40.925147043952414, "learning_rate": 8.059023836549376e-06, "loss": 3.0832, "step": 711 }, { "epoch": 0.24267211997273347, "grad_norm": 20.63874100758961, "learning_rate": 8.070374574347333e-06, "loss": 3.402, "step": 712 }, { "epoch": 0.24301295160190867, "grad_norm": 18.715683220270982, "learning_rate": 8.08172531214529e-06, "loss": 3.4639, "step": 713 }, { "epoch": 0.24335378323108384, "grad_norm": 44.20355440433992, "learning_rate": 8.093076049943248e-06, "loss": 3.0679, "step": 714 }, { "epoch": 0.24369461486025903, "grad_norm": 31.826772524627636, "learning_rate": 8.104426787741204e-06, "loss": 3.7664, "step": 715 }, { "epoch": 0.24403544648943423, "grad_norm": 27.998862877527973, "learning_rate": 8.115777525539161e-06, "loss": 3.0858, "step": 716 }, { "epoch": 0.2443762781186094, "grad_norm": 29.09471705736548, "learning_rate": 8.127128263337118e-06, "loss": 3.2368, "step": 717 }, { "epoch": 0.2447171097477846, "grad_norm": 21.88657292660979, "learning_rate": 8.138479001135074e-06, "loss": 3.1833, "step": 718 }, { "epoch": 0.24505794137695977, "grad_norm": 18.544996823988352, "learning_rate": 8.149829738933031e-06, "loss": 3.0913, "step": 719 }, { "epoch": 0.24539877300613497, "grad_norm": 17.94552616844981, "learning_rate": 8.161180476730988e-06, "loss": 3.4685, "step": 720 }, { "epoch": 0.24573960463531017, "grad_norm": 20.94315313031552, "learning_rate": 8.172531214528944e-06, "loss": 3.1671, "step": 721 }, { "epoch": 0.24608043626448534, "grad_norm": 24.495557032183594, "learning_rate": 8.183881952326902e-06, "loss": 3.8512, "step": 722 }, { "epoch": 0.24642126789366053, "grad_norm": 22.53006209557111, "learning_rate": 8.195232690124859e-06, "loss": 3.7313, "step": 723 }, { "epoch": 0.24676209952283573, "grad_norm": 20.002518382137144, "learning_rate": 8.206583427922816e-06, "loss": 3.6776, "step": 724 }, { "epoch": 0.2471029311520109, "grad_norm": 22.114017817319763, "learning_rate": 8.217934165720772e-06, "loss": 3.4949, "step": 725 }, { "epoch": 0.2474437627811861, "grad_norm": 25.04085620931253, "learning_rate": 8.229284903518729e-06, "loss": 3.2238, "step": 726 }, { "epoch": 0.24778459441036127, "grad_norm": 27.126486716814522, "learning_rate": 8.240635641316687e-06, "loss": 3.1024, "step": 727 }, { "epoch": 0.24812542603953647, "grad_norm": 40.4970720660853, "learning_rate": 8.251986379114642e-06, "loss": 3.5811, "step": 728 }, { "epoch": 0.24846625766871167, "grad_norm": 23.270458870747998, "learning_rate": 8.2633371169126e-06, "loss": 2.7018, "step": 729 }, { "epoch": 0.24880708929788684, "grad_norm": 21.563893288169957, "learning_rate": 8.274687854710557e-06, "loss": 3.2195, "step": 730 }, { "epoch": 0.24914792092706203, "grad_norm": 21.969457282311005, "learning_rate": 8.286038592508514e-06, "loss": 3.5316, "step": 731 }, { "epoch": 0.24948875255623723, "grad_norm": 29.888320629441765, "learning_rate": 8.29738933030647e-06, "loss": 3.9979, "step": 732 }, { "epoch": 0.2498295841854124, "grad_norm": 21.275171828484144, "learning_rate": 8.308740068104427e-06, "loss": 3.0122, "step": 733 }, { "epoch": 0.2501704158145876, "grad_norm": 23.558357711598134, "learning_rate": 8.320090805902384e-06, "loss": 2.9943, "step": 734 }, { "epoch": 0.2505112474437628, "grad_norm": 25.754048516259058, "learning_rate": 8.331441543700342e-06, "loss": 3.1752, "step": 735 }, { "epoch": 0.250852079072938, "grad_norm": 31.869324476375695, "learning_rate": 8.342792281498297e-06, "loss": 3.7288, "step": 736 }, { "epoch": 0.25119291070211314, "grad_norm": 22.02338431337976, "learning_rate": 8.354143019296255e-06, "loss": 3.1557, "step": 737 }, { "epoch": 0.25153374233128833, "grad_norm": 22.15417353419875, "learning_rate": 8.365493757094212e-06, "loss": 3.178, "step": 738 }, { "epoch": 0.25187457396046353, "grad_norm": 28.587881771287908, "learning_rate": 8.376844494892169e-06, "loss": 2.9669, "step": 739 }, { "epoch": 0.25221540558963873, "grad_norm": 21.660486901847246, "learning_rate": 8.388195232690125e-06, "loss": 2.7981, "step": 740 }, { "epoch": 0.25255623721881393, "grad_norm": 21.464901129614255, "learning_rate": 8.399545970488082e-06, "loss": 3.2154, "step": 741 }, { "epoch": 0.25289706884798907, "grad_norm": 20.81353706994248, "learning_rate": 8.410896708286039e-06, "loss": 3.2952, "step": 742 }, { "epoch": 0.25323790047716427, "grad_norm": 20.67379945345823, "learning_rate": 8.422247446083997e-06, "loss": 2.9638, "step": 743 }, { "epoch": 0.25357873210633947, "grad_norm": 25.2845816012546, "learning_rate": 8.433598183881952e-06, "loss": 3.0447, "step": 744 }, { "epoch": 0.25391956373551466, "grad_norm": 29.181569750568247, "learning_rate": 8.44494892167991e-06, "loss": 3.9157, "step": 745 }, { "epoch": 0.25426039536468986, "grad_norm": 50.306047658897015, "learning_rate": 8.456299659477867e-06, "loss": 3.4021, "step": 746 }, { "epoch": 0.254601226993865, "grad_norm": 37.95104095783337, "learning_rate": 8.467650397275823e-06, "loss": 3.7127, "step": 747 }, { "epoch": 0.2549420586230402, "grad_norm": 16.867235090739403, "learning_rate": 8.47900113507378e-06, "loss": 2.9545, "step": 748 }, { "epoch": 0.2552828902522154, "grad_norm": 24.044200320036865, "learning_rate": 8.490351872871737e-06, "loss": 3.1379, "step": 749 }, { "epoch": 0.2556237218813906, "grad_norm": 23.68826881389898, "learning_rate": 8.501702610669695e-06, "loss": 3.6071, "step": 750 }, { "epoch": 0.2559645535105658, "grad_norm": 26.072628285910994, "learning_rate": 8.513053348467652e-06, "loss": 3.1413, "step": 751 }, { "epoch": 0.256305385139741, "grad_norm": 24.718537369704446, "learning_rate": 8.524404086265608e-06, "loss": 2.9534, "step": 752 }, { "epoch": 0.25664621676891614, "grad_norm": 18.016555004330048, "learning_rate": 8.535754824063565e-06, "loss": 3.076, "step": 753 }, { "epoch": 0.25698704839809133, "grad_norm": 26.854752188618, "learning_rate": 8.547105561861521e-06, "loss": 3.079, "step": 754 }, { "epoch": 0.25732788002726653, "grad_norm": 25.230622917445956, "learning_rate": 8.558456299659478e-06, "loss": 3.8375, "step": 755 }, { "epoch": 0.25766871165644173, "grad_norm": 15.914855786142633, "learning_rate": 8.569807037457435e-06, "loss": 3.1441, "step": 756 }, { "epoch": 0.2580095432856169, "grad_norm": 26.56339053004718, "learning_rate": 8.581157775255391e-06, "loss": 3.2563, "step": 757 }, { "epoch": 0.25835037491479207, "grad_norm": 24.89552674321171, "learning_rate": 8.59250851305335e-06, "loss": 3.2297, "step": 758 }, { "epoch": 0.25869120654396727, "grad_norm": 37.25610626482077, "learning_rate": 8.603859250851306e-06, "loss": 3.7545, "step": 759 }, { "epoch": 0.25903203817314246, "grad_norm": 35.79330991591616, "learning_rate": 8.615209988649263e-06, "loss": 3.3511, "step": 760 }, { "epoch": 0.25937286980231766, "grad_norm": 27.87096759657452, "learning_rate": 8.62656072644722e-06, "loss": 3.4937, "step": 761 }, { "epoch": 0.25971370143149286, "grad_norm": 32.72143582203686, "learning_rate": 8.637911464245176e-06, "loss": 3.7593, "step": 762 }, { "epoch": 0.260054533060668, "grad_norm": 27.985840306272664, "learning_rate": 8.649262202043135e-06, "loss": 3.6257, "step": 763 }, { "epoch": 0.2603953646898432, "grad_norm": 26.048307456705928, "learning_rate": 8.66061293984109e-06, "loss": 3.3931, "step": 764 }, { "epoch": 0.2607361963190184, "grad_norm": 32.094776243865624, "learning_rate": 8.671963677639048e-06, "loss": 3.0766, "step": 765 }, { "epoch": 0.2610770279481936, "grad_norm": 33.190822738472185, "learning_rate": 8.683314415437004e-06, "loss": 3.2532, "step": 766 }, { "epoch": 0.2614178595773688, "grad_norm": 26.666305515026433, "learning_rate": 8.694665153234961e-06, "loss": 3.7685, "step": 767 }, { "epoch": 0.261758691206544, "grad_norm": 22.69569420911664, "learning_rate": 8.706015891032918e-06, "loss": 3.0247, "step": 768 }, { "epoch": 0.26209952283571913, "grad_norm": 36.2068461132267, "learning_rate": 8.717366628830874e-06, "loss": 3.5706, "step": 769 }, { "epoch": 0.26244035446489433, "grad_norm": 25.11675597353937, "learning_rate": 8.728717366628831e-06, "loss": 3.671, "step": 770 }, { "epoch": 0.26278118609406953, "grad_norm": 22.2460131010649, "learning_rate": 8.74006810442679e-06, "loss": 2.4091, "step": 771 }, { "epoch": 0.2631220177232447, "grad_norm": 27.106951476016487, "learning_rate": 8.751418842224744e-06, "loss": 3.6041, "step": 772 }, { "epoch": 0.2634628493524199, "grad_norm": 19.88883884697766, "learning_rate": 8.762769580022703e-06, "loss": 3.405, "step": 773 }, { "epoch": 0.26380368098159507, "grad_norm": 20.50368807237646, "learning_rate": 8.77412031782066e-06, "loss": 3.4647, "step": 774 }, { "epoch": 0.26414451261077027, "grad_norm": 23.126077222692306, "learning_rate": 8.785471055618616e-06, "loss": 3.9059, "step": 775 }, { "epoch": 0.26448534423994546, "grad_norm": 18.0655313534014, "learning_rate": 8.796821793416572e-06, "loss": 3.5183, "step": 776 }, { "epoch": 0.26482617586912066, "grad_norm": 23.140956328999053, "learning_rate": 8.808172531214529e-06, "loss": 3.1886, "step": 777 }, { "epoch": 0.26516700749829586, "grad_norm": 41.110278540451354, "learning_rate": 8.819523269012486e-06, "loss": 3.7806, "step": 778 }, { "epoch": 0.26550783912747106, "grad_norm": 28.585057531119773, "learning_rate": 8.830874006810444e-06, "loss": 3.5836, "step": 779 }, { "epoch": 0.2658486707566462, "grad_norm": 42.78866346651865, "learning_rate": 8.842224744608399e-06, "loss": 3.6422, "step": 780 }, { "epoch": 0.2661895023858214, "grad_norm": 43.254943897727856, "learning_rate": 8.853575482406357e-06, "loss": 2.6481, "step": 781 }, { "epoch": 0.2665303340149966, "grad_norm": 15.383032910503957, "learning_rate": 8.864926220204314e-06, "loss": 3.2938, "step": 782 }, { "epoch": 0.2668711656441718, "grad_norm": 17.603301832141664, "learning_rate": 8.87627695800227e-06, "loss": 3.2174, "step": 783 }, { "epoch": 0.267211997273347, "grad_norm": 29.20666670769123, "learning_rate": 8.887627695800227e-06, "loss": 3.4076, "step": 784 }, { "epoch": 0.26755282890252213, "grad_norm": 24.7537573499683, "learning_rate": 8.898978433598184e-06, "loss": 3.6132, "step": 785 }, { "epoch": 0.26789366053169733, "grad_norm": 73.8550198315664, "learning_rate": 8.910329171396142e-06, "loss": 3.0391, "step": 786 }, { "epoch": 0.26823449216087253, "grad_norm": 24.528172210733576, "learning_rate": 8.921679909194099e-06, "loss": 3.5183, "step": 787 }, { "epoch": 0.2685753237900477, "grad_norm": 21.478137367927445, "learning_rate": 8.933030646992055e-06, "loss": 3.3609, "step": 788 }, { "epoch": 0.2689161554192229, "grad_norm": 19.54909258835435, "learning_rate": 8.944381384790012e-06, "loss": 2.866, "step": 789 }, { "epoch": 0.26925698704839807, "grad_norm": 26.87166298365148, "learning_rate": 8.955732122587969e-06, "loss": 3.4004, "step": 790 }, { "epoch": 0.26959781867757326, "grad_norm": 21.262880902795793, "learning_rate": 8.967082860385925e-06, "loss": 3.1228, "step": 791 }, { "epoch": 0.26993865030674846, "grad_norm": 24.79413190238954, "learning_rate": 8.978433598183882e-06, "loss": 3.0429, "step": 792 }, { "epoch": 0.27027948193592366, "grad_norm": 24.395933576878956, "learning_rate": 8.989784335981839e-06, "loss": 3.4952, "step": 793 }, { "epoch": 0.27062031356509886, "grad_norm": 23.91475960949721, "learning_rate": 9.001135073779797e-06, "loss": 3.2471, "step": 794 }, { "epoch": 0.27096114519427406, "grad_norm": 22.337045614116192, "learning_rate": 9.012485811577754e-06, "loss": 3.4552, "step": 795 }, { "epoch": 0.2713019768234492, "grad_norm": 28.976045213872876, "learning_rate": 9.02383654937571e-06, "loss": 3.8317, "step": 796 }, { "epoch": 0.2716428084526244, "grad_norm": 26.878664981423082, "learning_rate": 9.035187287173667e-06, "loss": 2.769, "step": 797 }, { "epoch": 0.2719836400817996, "grad_norm": 21.092339081213616, "learning_rate": 9.046538024971623e-06, "loss": 3.1567, "step": 798 }, { "epoch": 0.2723244717109748, "grad_norm": 21.27115420968662, "learning_rate": 9.057888762769582e-06, "loss": 3.3388, "step": 799 }, { "epoch": 0.27266530334015, "grad_norm": 21.334859332764378, "learning_rate": 9.069239500567537e-06, "loss": 2.7804, "step": 800 }, { "epoch": 0.27300613496932513, "grad_norm": 20.086611157320537, "learning_rate": 9.080590238365495e-06, "loss": 3.1492, "step": 801 }, { "epoch": 0.27334696659850033, "grad_norm": 19.590253905148945, "learning_rate": 9.091940976163452e-06, "loss": 3.2286, "step": 802 }, { "epoch": 0.2736877982276755, "grad_norm": 23.377639096329997, "learning_rate": 9.103291713961408e-06, "loss": 3.06, "step": 803 }, { "epoch": 0.2740286298568507, "grad_norm": 23.25023518585151, "learning_rate": 9.114642451759365e-06, "loss": 3.3556, "step": 804 }, { "epoch": 0.2743694614860259, "grad_norm": 33.19832442235924, "learning_rate": 9.125993189557322e-06, "loss": 3.1801, "step": 805 }, { "epoch": 0.27471029311520107, "grad_norm": 23.267995647419923, "learning_rate": 9.137343927355278e-06, "loss": 2.8149, "step": 806 }, { "epoch": 0.27505112474437626, "grad_norm": 18.469309658464905, "learning_rate": 9.148694665153237e-06, "loss": 2.9582, "step": 807 }, { "epoch": 0.27539195637355146, "grad_norm": 20.6039674662935, "learning_rate": 9.160045402951191e-06, "loss": 3.4269, "step": 808 }, { "epoch": 0.27573278800272666, "grad_norm": 24.87941854774333, "learning_rate": 9.17139614074915e-06, "loss": 3.6045, "step": 809 }, { "epoch": 0.27607361963190186, "grad_norm": 39.90973855651112, "learning_rate": 9.182746878547106e-06, "loss": 4.0639, "step": 810 }, { "epoch": 0.27641445126107705, "grad_norm": 23.85839114983585, "learning_rate": 9.194097616345063e-06, "loss": 3.0495, "step": 811 }, { "epoch": 0.2767552828902522, "grad_norm": 26.638642085275748, "learning_rate": 9.20544835414302e-06, "loss": 3.8556, "step": 812 }, { "epoch": 0.2770961145194274, "grad_norm": 26.22603006923388, "learning_rate": 9.216799091940976e-06, "loss": 3.8839, "step": 813 }, { "epoch": 0.2774369461486026, "grad_norm": 28.38764329066922, "learning_rate": 9.228149829738935e-06, "loss": 3.6359, "step": 814 }, { "epoch": 0.2777777777777778, "grad_norm": 27.438949154535752, "learning_rate": 9.239500567536891e-06, "loss": 3.4608, "step": 815 }, { "epoch": 0.278118609406953, "grad_norm": 29.546208093223125, "learning_rate": 9.250851305334846e-06, "loss": 3.4507, "step": 816 }, { "epoch": 0.27845944103612813, "grad_norm": 36.0313703745622, "learning_rate": 9.262202043132805e-06, "loss": 3.3545, "step": 817 }, { "epoch": 0.27880027266530333, "grad_norm": 22.997000976953416, "learning_rate": 9.273552780930761e-06, "loss": 3.2352, "step": 818 }, { "epoch": 0.2791411042944785, "grad_norm": 19.414012546910165, "learning_rate": 9.284903518728718e-06, "loss": 2.737, "step": 819 }, { "epoch": 0.2794819359236537, "grad_norm": 31.5413361385869, "learning_rate": 9.296254256526674e-06, "loss": 3.2417, "step": 820 }, { "epoch": 0.2798227675528289, "grad_norm": 15.827403673405787, "learning_rate": 9.307604994324631e-06, "loss": 2.9977, "step": 821 }, { "epoch": 0.28016359918200406, "grad_norm": 20.82469606006344, "learning_rate": 9.31895573212259e-06, "loss": 3.2735, "step": 822 }, { "epoch": 0.28050443081117926, "grad_norm": 32.20046594480221, "learning_rate": 9.330306469920546e-06, "loss": 3.2921, "step": 823 }, { "epoch": 0.28084526244035446, "grad_norm": 26.362909913318603, "learning_rate": 9.341657207718503e-06, "loss": 3.5835, "step": 824 }, { "epoch": 0.28118609406952966, "grad_norm": 22.84990659985345, "learning_rate": 9.35300794551646e-06, "loss": 3.0717, "step": 825 }, { "epoch": 0.28152692569870486, "grad_norm": 27.2027280666498, "learning_rate": 9.364358683314416e-06, "loss": 3.5037, "step": 826 }, { "epoch": 0.28186775732788005, "grad_norm": 29.798198940830076, "learning_rate": 9.375709421112373e-06, "loss": 4.0117, "step": 827 }, { "epoch": 0.2822085889570552, "grad_norm": 20.642400723301353, "learning_rate": 9.38706015891033e-06, "loss": 3.429, "step": 828 }, { "epoch": 0.2825494205862304, "grad_norm": 32.04416312412705, "learning_rate": 9.398410896708286e-06, "loss": 3.2567, "step": 829 }, { "epoch": 0.2828902522154056, "grad_norm": 22.52104807528634, "learning_rate": 9.409761634506244e-06, "loss": 3.5261, "step": 830 }, { "epoch": 0.2832310838445808, "grad_norm": 48.220650181790575, "learning_rate": 9.4211123723042e-06, "loss": 3.5013, "step": 831 }, { "epoch": 0.283571915473756, "grad_norm": 21.05062457771934, "learning_rate": 9.432463110102157e-06, "loss": 3.3212, "step": 832 }, { "epoch": 0.28391274710293113, "grad_norm": 13.087044900477736, "learning_rate": 9.443813847900114e-06, "loss": 2.6118, "step": 833 }, { "epoch": 0.2842535787321063, "grad_norm": 36.01934218552081, "learning_rate": 9.45516458569807e-06, "loss": 3.0547, "step": 834 }, { "epoch": 0.2845944103612815, "grad_norm": 29.59387842751995, "learning_rate": 9.466515323496029e-06, "loss": 3.3447, "step": 835 }, { "epoch": 0.2849352419904567, "grad_norm": 30.59072072344251, "learning_rate": 9.477866061293984e-06, "loss": 3.8825, "step": 836 }, { "epoch": 0.2852760736196319, "grad_norm": 19.260491796504766, "learning_rate": 9.489216799091942e-06, "loss": 3.2133, "step": 837 }, { "epoch": 0.28561690524880706, "grad_norm": 20.63171930742733, "learning_rate": 9.500567536889899e-06, "loss": 3.2055, "step": 838 }, { "epoch": 0.28595773687798226, "grad_norm": 20.245988624402422, "learning_rate": 9.511918274687855e-06, "loss": 2.9113, "step": 839 }, { "epoch": 0.28629856850715746, "grad_norm": 28.38871079241072, "learning_rate": 9.523269012485812e-06, "loss": 3.4454, "step": 840 }, { "epoch": 0.28663940013633266, "grad_norm": 15.018151311228648, "learning_rate": 9.534619750283769e-06, "loss": 3.1854, "step": 841 }, { "epoch": 0.28698023176550785, "grad_norm": 23.20153117378376, "learning_rate": 9.545970488081725e-06, "loss": 3.5299, "step": 842 }, { "epoch": 0.28732106339468305, "grad_norm": 25.978778297406183, "learning_rate": 9.557321225879684e-06, "loss": 3.1834, "step": 843 }, { "epoch": 0.2876618950238582, "grad_norm": 14.633036434579006, "learning_rate": 9.568671963677639e-06, "loss": 3.0366, "step": 844 }, { "epoch": 0.2880027266530334, "grad_norm": 42.84383409980026, "learning_rate": 9.580022701475597e-06, "loss": 3.532, "step": 845 }, { "epoch": 0.2883435582822086, "grad_norm": 23.954226311640735, "learning_rate": 9.591373439273554e-06, "loss": 3.5636, "step": 846 }, { "epoch": 0.2886843899113838, "grad_norm": 27.974792253005543, "learning_rate": 9.60272417707151e-06, "loss": 4.2702, "step": 847 }, { "epoch": 0.289025221540559, "grad_norm": 25.27620756189969, "learning_rate": 9.614074914869467e-06, "loss": 3.5109, "step": 848 }, { "epoch": 0.2893660531697341, "grad_norm": 37.07533122664887, "learning_rate": 9.625425652667424e-06, "loss": 3.6019, "step": 849 }, { "epoch": 0.2897068847989093, "grad_norm": 18.141635217799177, "learning_rate": 9.636776390465382e-06, "loss": 3.1653, "step": 850 }, { "epoch": 0.2900477164280845, "grad_norm": 16.408363596755482, "learning_rate": 9.648127128263338e-06, "loss": 3.4963, "step": 851 }, { "epoch": 0.2903885480572597, "grad_norm": 21.487261945576247, "learning_rate": 9.659477866061293e-06, "loss": 3.7555, "step": 852 }, { "epoch": 0.2907293796864349, "grad_norm": 21.814628428118155, "learning_rate": 9.670828603859252e-06, "loss": 3.3467, "step": 853 }, { "epoch": 0.29107021131561006, "grad_norm": 20.258145801585798, "learning_rate": 9.682179341657208e-06, "loss": 3.4661, "step": 854 }, { "epoch": 0.29141104294478526, "grad_norm": 16.00319387546845, "learning_rate": 9.693530079455165e-06, "loss": 3.3164, "step": 855 }, { "epoch": 0.29175187457396046, "grad_norm": 25.689939710866422, "learning_rate": 9.704880817253122e-06, "loss": 3.4663, "step": 856 }, { "epoch": 0.29209270620313565, "grad_norm": 26.822581954837638, "learning_rate": 9.716231555051078e-06, "loss": 3.1245, "step": 857 }, { "epoch": 0.29243353783231085, "grad_norm": 16.073062107850657, "learning_rate": 9.727582292849037e-06, "loss": 3.213, "step": 858 }, { "epoch": 0.29277436946148605, "grad_norm": 14.2032218597238, "learning_rate": 9.738933030646993e-06, "loss": 2.9584, "step": 859 }, { "epoch": 0.2931152010906612, "grad_norm": 22.487763438790918, "learning_rate": 9.75028376844495e-06, "loss": 3.2377, "step": 860 }, { "epoch": 0.2934560327198364, "grad_norm": 33.2252626373012, "learning_rate": 9.761634506242906e-06, "loss": 3.0383, "step": 861 }, { "epoch": 0.2937968643490116, "grad_norm": 27.33347190209937, "learning_rate": 9.772985244040863e-06, "loss": 3.5986, "step": 862 }, { "epoch": 0.2941376959781868, "grad_norm": 19.633427008685462, "learning_rate": 9.784335981838821e-06, "loss": 3.4734, "step": 863 }, { "epoch": 0.294478527607362, "grad_norm": 34.268797429388925, "learning_rate": 9.795686719636776e-06, "loss": 3.5889, "step": 864 }, { "epoch": 0.2948193592365371, "grad_norm": 25.08698067517474, "learning_rate": 9.807037457434733e-06, "loss": 3.1935, "step": 865 }, { "epoch": 0.2951601908657123, "grad_norm": 17.221767039952358, "learning_rate": 9.818388195232691e-06, "loss": 2.6544, "step": 866 }, { "epoch": 0.2955010224948875, "grad_norm": 22.754572200380267, "learning_rate": 9.829738933030648e-06, "loss": 3.7041, "step": 867 }, { "epoch": 0.2958418541240627, "grad_norm": 21.84309905599954, "learning_rate": 9.841089670828605e-06, "loss": 3.744, "step": 868 }, { "epoch": 0.2961826857532379, "grad_norm": 30.591901639534964, "learning_rate": 9.852440408626561e-06, "loss": 4.1962, "step": 869 }, { "epoch": 0.2965235173824131, "grad_norm": 18.108910996630414, "learning_rate": 9.863791146424518e-06, "loss": 3.255, "step": 870 }, { "epoch": 0.29686434901158826, "grad_norm": 25.604291529883042, "learning_rate": 9.875141884222476e-06, "loss": 3.4427, "step": 871 }, { "epoch": 0.29720518064076346, "grad_norm": 23.01863541949127, "learning_rate": 9.886492622020431e-06, "loss": 3.4422, "step": 872 }, { "epoch": 0.29754601226993865, "grad_norm": 20.387655041498515, "learning_rate": 9.89784335981839e-06, "loss": 3.3177, "step": 873 }, { "epoch": 0.29788684389911385, "grad_norm": 27.19500461419769, "learning_rate": 9.909194097616346e-06, "loss": 3.5902, "step": 874 }, { "epoch": 0.29822767552828905, "grad_norm": 17.8403695225531, "learning_rate": 9.920544835414303e-06, "loss": 3.223, "step": 875 }, { "epoch": 0.2985685071574642, "grad_norm": 29.3436181441884, "learning_rate": 9.93189557321226e-06, "loss": 3.3883, "step": 876 }, { "epoch": 0.2989093387866394, "grad_norm": 24.24646161719558, "learning_rate": 9.943246311010216e-06, "loss": 3.4357, "step": 877 }, { "epoch": 0.2992501704158146, "grad_norm": 34.524104002392875, "learning_rate": 9.954597048808173e-06, "loss": 3.4251, "step": 878 }, { "epoch": 0.2995910020449898, "grad_norm": 20.988087700741243, "learning_rate": 9.965947786606131e-06, "loss": 3.3922, "step": 879 }, { "epoch": 0.299931833674165, "grad_norm": 25.41350666440404, "learning_rate": 9.977298524404086e-06, "loss": 3.0033, "step": 880 }, { "epoch": 0.3002726653033401, "grad_norm": 47.64637761170976, "learning_rate": 9.988649262202044e-06, "loss": 3.7157, "step": 881 }, { "epoch": 0.3006134969325153, "grad_norm": 25.563287557803758, "learning_rate": 1e-05, "loss": 3.6178, "step": 882 }, { "epoch": 0.3009543285616905, "grad_norm": 19.621235606484483, "learning_rate": 9.999999606740049e-06, "loss": 3.6751, "step": 883 }, { "epoch": 0.3012951601908657, "grad_norm": 16.973824092381143, "learning_rate": 9.999998426960256e-06, "loss": 3.3814, "step": 884 }, { "epoch": 0.3016359918200409, "grad_norm": 22.449226600292146, "learning_rate": 9.999996460660805e-06, "loss": 3.4696, "step": 885 }, { "epoch": 0.3019768234492161, "grad_norm": 19.527031863127327, "learning_rate": 9.999993707842007e-06, "loss": 3.3002, "step": 886 }, { "epoch": 0.30231765507839126, "grad_norm": 44.945285037916015, "learning_rate": 9.999990168504296e-06, "loss": 3.6489, "step": 887 }, { "epoch": 0.30265848670756645, "grad_norm": 24.973038773254128, "learning_rate": 9.999985842648228e-06, "loss": 2.759, "step": 888 }, { "epoch": 0.30299931833674165, "grad_norm": 25.900935310061467, "learning_rate": 9.999980730274481e-06, "loss": 3.1809, "step": 889 }, { "epoch": 0.30334014996591685, "grad_norm": 23.144770439947493, "learning_rate": 9.999974831383862e-06, "loss": 3.5488, "step": 890 }, { "epoch": 0.30368098159509205, "grad_norm": 25.83270721382352, "learning_rate": 9.9999681459773e-06, "loss": 3.8451, "step": 891 }, { "epoch": 0.3040218132242672, "grad_norm": 17.536764377187176, "learning_rate": 9.999960674055844e-06, "loss": 3.1129, "step": 892 }, { "epoch": 0.3043626448534424, "grad_norm": 35.28931840289613, "learning_rate": 9.999952415620671e-06, "loss": 3.8822, "step": 893 }, { "epoch": 0.3047034764826176, "grad_norm": 20.49798965442751, "learning_rate": 9.999943370673078e-06, "loss": 3.2214, "step": 894 }, { "epoch": 0.3050443081117928, "grad_norm": 23.574449554632288, "learning_rate": 9.99993353921449e-06, "loss": 3.107, "step": 895 }, { "epoch": 0.305385139740968, "grad_norm": 32.37301565899288, "learning_rate": 9.999922921246453e-06, "loss": 3.6538, "step": 896 }, { "epoch": 0.3057259713701431, "grad_norm": 26.78641335981452, "learning_rate": 9.999911516770634e-06, "loss": 3.8318, "step": 897 }, { "epoch": 0.3060668029993183, "grad_norm": 32.1313320443883, "learning_rate": 9.999899325788834e-06, "loss": 3.5537, "step": 898 }, { "epoch": 0.3064076346284935, "grad_norm": 15.858855823165637, "learning_rate": 9.999886348302966e-06, "loss": 2.7203, "step": 899 }, { "epoch": 0.3067484662576687, "grad_norm": 21.89642099857721, "learning_rate": 9.99987258431507e-06, "loss": 3.198, "step": 900 }, { "epoch": 0.3070892978868439, "grad_norm": 20.50897435051379, "learning_rate": 9.999858033827313e-06, "loss": 3.2204, "step": 901 }, { "epoch": 0.3074301295160191, "grad_norm": 19.39467710260482, "learning_rate": 9.999842696841987e-06, "loss": 3.341, "step": 902 }, { "epoch": 0.30777096114519426, "grad_norm": 31.022873251801197, "learning_rate": 9.999826573361498e-06, "loss": 3.4892, "step": 903 }, { "epoch": 0.30811179277436945, "grad_norm": 24.855787856412118, "learning_rate": 9.999809663388386e-06, "loss": 3.2009, "step": 904 }, { "epoch": 0.30845262440354465, "grad_norm": 23.546815568395473, "learning_rate": 9.999791966925313e-06, "loss": 3.4443, "step": 905 }, { "epoch": 0.30879345603271985, "grad_norm": 18.5829572103646, "learning_rate": 9.999773483975061e-06, "loss": 3.5261, "step": 906 }, { "epoch": 0.30913428766189505, "grad_norm": 36.28576401451518, "learning_rate": 9.999754214540536e-06, "loss": 3.1505, "step": 907 }, { "epoch": 0.3094751192910702, "grad_norm": 16.2909535599977, "learning_rate": 9.99973415862477e-06, "loss": 2.6659, "step": 908 }, { "epoch": 0.3098159509202454, "grad_norm": 60.199884046060866, "learning_rate": 9.999713316230919e-06, "loss": 3.0563, "step": 909 }, { "epoch": 0.3101567825494206, "grad_norm": 19.312308109819764, "learning_rate": 9.999691687362258e-06, "loss": 3.7053, "step": 910 }, { "epoch": 0.3104976141785958, "grad_norm": 31.80731808113836, "learning_rate": 9.999669272022194e-06, "loss": 3.8509, "step": 911 }, { "epoch": 0.310838445807771, "grad_norm": 26.015822544054206, "learning_rate": 9.99964607021425e-06, "loss": 3.6209, "step": 912 }, { "epoch": 0.3111792774369461, "grad_norm": 24.694038163948807, "learning_rate": 9.99962208194208e-06, "loss": 3.4148, "step": 913 }, { "epoch": 0.3115201090661213, "grad_norm": 50.94538943517919, "learning_rate": 9.99959730720945e-06, "loss": 3.7067, "step": 914 }, { "epoch": 0.3118609406952965, "grad_norm": 19.830575864042817, "learning_rate": 9.999571746020264e-06, "loss": 3.4052, "step": 915 }, { "epoch": 0.3122017723244717, "grad_norm": 38.78441278709003, "learning_rate": 9.999545398378538e-06, "loss": 3.3276, "step": 916 }, { "epoch": 0.3125426039536469, "grad_norm": 17.306379258605357, "learning_rate": 9.99951826428842e-06, "loss": 3.1559, "step": 917 }, { "epoch": 0.3128834355828221, "grad_norm": 24.38371517237406, "learning_rate": 9.999490343754177e-06, "loss": 3.2217, "step": 918 }, { "epoch": 0.31322426721199725, "grad_norm": 21.742398537496943, "learning_rate": 9.999461636780201e-06, "loss": 2.9896, "step": 919 }, { "epoch": 0.31356509884117245, "grad_norm": 17.992825848211677, "learning_rate": 9.999432143371008e-06, "loss": 3.5454, "step": 920 }, { "epoch": 0.31390593047034765, "grad_norm": 16.7574433279547, "learning_rate": 9.999401863531237e-06, "loss": 3.3121, "step": 921 }, { "epoch": 0.31424676209952285, "grad_norm": 23.917050259437865, "learning_rate": 9.999370797265653e-06, "loss": 3.2039, "step": 922 }, { "epoch": 0.31458759372869805, "grad_norm": 16.061624809714367, "learning_rate": 9.99933894457914e-06, "loss": 3.3809, "step": 923 }, { "epoch": 0.3149284253578732, "grad_norm": 16.239659569723802, "learning_rate": 9.999306305476709e-06, "loss": 2.8577, "step": 924 }, { "epoch": 0.3152692569870484, "grad_norm": 20.16704851148129, "learning_rate": 9.999272879963497e-06, "loss": 3.2859, "step": 925 }, { "epoch": 0.3156100886162236, "grad_norm": 40.40588521156014, "learning_rate": 9.999238668044758e-06, "loss": 3.2831, "step": 926 }, { "epoch": 0.3159509202453988, "grad_norm": 16.895512865929234, "learning_rate": 9.999203669725877e-06, "loss": 3.2776, "step": 927 }, { "epoch": 0.316291751874574, "grad_norm": 25.61638382704822, "learning_rate": 9.999167885012357e-06, "loss": 3.6473, "step": 928 }, { "epoch": 0.3166325835037491, "grad_norm": 26.154040418672754, "learning_rate": 9.99913131390983e-06, "loss": 3.4091, "step": 929 }, { "epoch": 0.3169734151329243, "grad_norm": 27.239936767315605, "learning_rate": 9.999093956424044e-06, "loss": 3.4799, "step": 930 }, { "epoch": 0.3173142467620995, "grad_norm": 18.036041660163733, "learning_rate": 9.99905581256088e-06, "loss": 3.6383, "step": 931 }, { "epoch": 0.3176550783912747, "grad_norm": 22.143897398083077, "learning_rate": 9.999016882326338e-06, "loss": 3.7367, "step": 932 }, { "epoch": 0.3179959100204499, "grad_norm": 19.8129549041829, "learning_rate": 9.998977165726537e-06, "loss": 3.3933, "step": 933 }, { "epoch": 0.3183367416496251, "grad_norm": 20.218098067652143, "learning_rate": 9.998936662767732e-06, "loss": 3.4046, "step": 934 }, { "epoch": 0.31867757327880025, "grad_norm": 32.175965291488076, "learning_rate": 9.998895373456287e-06, "loss": 3.7336, "step": 935 }, { "epoch": 0.31901840490797545, "grad_norm": 15.444811121662028, "learning_rate": 9.998853297798703e-06, "loss": 3.3616, "step": 936 }, { "epoch": 0.31935923653715065, "grad_norm": 19.117116529845763, "learning_rate": 9.998810435801594e-06, "loss": 3.1808, "step": 937 }, { "epoch": 0.31970006816632585, "grad_norm": 17.520032265709165, "learning_rate": 9.998766787471705e-06, "loss": 3.2491, "step": 938 }, { "epoch": 0.32004089979550104, "grad_norm": 25.868935992525177, "learning_rate": 9.998722352815902e-06, "loss": 3.3832, "step": 939 }, { "epoch": 0.3203817314246762, "grad_norm": 18.08707943829483, "learning_rate": 9.998677131841175e-06, "loss": 3.0646, "step": 940 }, { "epoch": 0.3207225630538514, "grad_norm": 21.300425761289215, "learning_rate": 9.998631124554634e-06, "loss": 3.1914, "step": 941 }, { "epoch": 0.3210633946830266, "grad_norm": 21.950415459436282, "learning_rate": 9.99858433096352e-06, "loss": 2.8182, "step": 942 }, { "epoch": 0.3214042263122018, "grad_norm": 18.096751988766083, "learning_rate": 9.998536751075191e-06, "loss": 3.1606, "step": 943 }, { "epoch": 0.321745057941377, "grad_norm": 16.529596246647436, "learning_rate": 9.998488384897136e-06, "loss": 3.3708, "step": 944 }, { "epoch": 0.3220858895705521, "grad_norm": 38.82901907644897, "learning_rate": 9.998439232436958e-06, "loss": 3.7399, "step": 945 }, { "epoch": 0.3224267211997273, "grad_norm": 22.553126085750584, "learning_rate": 9.998389293702393e-06, "loss": 3.1169, "step": 946 }, { "epoch": 0.3227675528289025, "grad_norm": 19.868806926552562, "learning_rate": 9.998338568701293e-06, "loss": 2.963, "step": 947 }, { "epoch": 0.3231083844580777, "grad_norm": 21.484504978071534, "learning_rate": 9.998287057441641e-06, "loss": 3.3558, "step": 948 }, { "epoch": 0.3234492160872529, "grad_norm": 27.179275940719307, "learning_rate": 9.998234759931536e-06, "loss": 3.6229, "step": 949 }, { "epoch": 0.3237900477164281, "grad_norm": 26.407650505664417, "learning_rate": 9.998181676179205e-06, "loss": 3.9817, "step": 950 }, { "epoch": 0.32413087934560325, "grad_norm": 17.195563626263972, "learning_rate": 9.998127806193002e-06, "loss": 3.219, "step": 951 }, { "epoch": 0.32447171097477845, "grad_norm": 17.99645647069595, "learning_rate": 9.998073149981398e-06, "loss": 3.0451, "step": 952 }, { "epoch": 0.32481254260395365, "grad_norm": 20.64305654598908, "learning_rate": 9.998017707552992e-06, "loss": 2.9757, "step": 953 }, { "epoch": 0.32515337423312884, "grad_norm": 23.418652641809388, "learning_rate": 9.997961478916504e-06, "loss": 3.383, "step": 954 }, { "epoch": 0.32549420586230404, "grad_norm": 16.556362490162357, "learning_rate": 9.997904464080779e-06, "loss": 3.1277, "step": 955 }, { "epoch": 0.3258350374914792, "grad_norm": 13.392588666148562, "learning_rate": 9.997846663054786e-06, "loss": 2.9564, "step": 956 }, { "epoch": 0.3261758691206544, "grad_norm": 25.41985879051928, "learning_rate": 9.99778807584762e-06, "loss": 3.5963, "step": 957 }, { "epoch": 0.3265167007498296, "grad_norm": 19.8072567300104, "learning_rate": 9.997728702468493e-06, "loss": 3.6759, "step": 958 }, { "epoch": 0.3268575323790048, "grad_norm": 19.67160482760699, "learning_rate": 9.997668542926747e-06, "loss": 3.6193, "step": 959 }, { "epoch": 0.32719836400818, "grad_norm": 22.235335556172085, "learning_rate": 9.997607597231845e-06, "loss": 3.7544, "step": 960 }, { "epoch": 0.3275391956373552, "grad_norm": 25.072060075868396, "learning_rate": 9.997545865393373e-06, "loss": 3.1102, "step": 961 }, { "epoch": 0.3278800272665303, "grad_norm": 35.87604644050223, "learning_rate": 9.997483347421042e-06, "loss": 3.5712, "step": 962 }, { "epoch": 0.3282208588957055, "grad_norm": 18.47470784682449, "learning_rate": 9.997420043324687e-06, "loss": 3.1538, "step": 963 }, { "epoch": 0.3285616905248807, "grad_norm": 22.297708871385638, "learning_rate": 9.997355953114265e-06, "loss": 3.5527, "step": 964 }, { "epoch": 0.3289025221540559, "grad_norm": 23.264492087543953, "learning_rate": 9.997291076799858e-06, "loss": 3.1725, "step": 965 }, { "epoch": 0.3292433537832311, "grad_norm": 18.24215283505048, "learning_rate": 9.997225414391672e-06, "loss": 3.2788, "step": 966 }, { "epoch": 0.32958418541240625, "grad_norm": 16.200895207611723, "learning_rate": 9.997158965900035e-06, "loss": 3.3053, "step": 967 }, { "epoch": 0.32992501704158145, "grad_norm": 21.705154378363925, "learning_rate": 9.9970917313354e-06, "loss": 3.3499, "step": 968 }, { "epoch": 0.33026584867075665, "grad_norm": 32.12027799792602, "learning_rate": 9.997023710708345e-06, "loss": 4.0311, "step": 969 }, { "epoch": 0.33060668029993184, "grad_norm": 21.03514301831359, "learning_rate": 9.996954904029567e-06, "loss": 2.8821, "step": 970 }, { "epoch": 0.33094751192910704, "grad_norm": 38.19882425236756, "learning_rate": 9.996885311309892e-06, "loss": 3.8124, "step": 971 }, { "epoch": 0.3312883435582822, "grad_norm": 27.310042903493017, "learning_rate": 9.996814932560267e-06, "loss": 3.9093, "step": 972 }, { "epoch": 0.3316291751874574, "grad_norm": 28.072751961723945, "learning_rate": 9.99674376779176e-06, "loss": 3.3036, "step": 973 }, { "epoch": 0.3319700068166326, "grad_norm": 31.62206738581211, "learning_rate": 9.996671817015566e-06, "loss": 4.0746, "step": 974 }, { "epoch": 0.3323108384458078, "grad_norm": 26.650979341128934, "learning_rate": 9.996599080243006e-06, "loss": 3.2166, "step": 975 }, { "epoch": 0.332651670074983, "grad_norm": 28.973313632973767, "learning_rate": 9.996525557485522e-06, "loss": 3.7695, "step": 976 }, { "epoch": 0.3329925017041582, "grad_norm": 22.597452411932505, "learning_rate": 9.996451248754677e-06, "loss": 3.8569, "step": 977 }, { "epoch": 0.3333333333333333, "grad_norm": 22.838360096890366, "learning_rate": 9.99637615406216e-06, "loss": 3.4959, "step": 978 }, { "epoch": 0.3336741649625085, "grad_norm": 22.617214888476887, "learning_rate": 9.996300273419785e-06, "loss": 2.9747, "step": 979 }, { "epoch": 0.3340149965916837, "grad_norm": 19.244857113410877, "learning_rate": 9.996223606839488e-06, "loss": 3.7922, "step": 980 }, { "epoch": 0.3343558282208589, "grad_norm": 14.825928009453055, "learning_rate": 9.996146154333328e-06, "loss": 2.9686, "step": 981 }, { "epoch": 0.3346966598500341, "grad_norm": 22.373970259865008, "learning_rate": 9.996067915913489e-06, "loss": 4.0649, "step": 982 }, { "epoch": 0.33503749147920925, "grad_norm": 23.846791622542362, "learning_rate": 9.99598889159228e-06, "loss": 3.6298, "step": 983 }, { "epoch": 0.33537832310838445, "grad_norm": 24.198705449201203, "learning_rate": 9.995909081382131e-06, "loss": 2.9951, "step": 984 }, { "epoch": 0.33571915473755964, "grad_norm": 19.89617163897643, "learning_rate": 9.995828485295596e-06, "loss": 3.22, "step": 985 }, { "epoch": 0.33605998636673484, "grad_norm": 15.527251838920543, "learning_rate": 9.995747103345352e-06, "loss": 3.3982, "step": 986 }, { "epoch": 0.33640081799591004, "grad_norm": 22.942905569699253, "learning_rate": 9.9956649355442e-06, "loss": 3.5283, "step": 987 }, { "epoch": 0.3367416496250852, "grad_norm": 20.707743760280447, "learning_rate": 9.99558198190507e-06, "loss": 3.2543, "step": 988 }, { "epoch": 0.3370824812542604, "grad_norm": 23.227905746589638, "learning_rate": 9.995498242441006e-06, "loss": 3.1755, "step": 989 }, { "epoch": 0.3374233128834356, "grad_norm": 23.975459987042164, "learning_rate": 9.995413717165183e-06, "loss": 3.8023, "step": 990 }, { "epoch": 0.3377641445126108, "grad_norm": 48.91309212932905, "learning_rate": 9.995328406090899e-06, "loss": 3.3713, "step": 991 }, { "epoch": 0.338104976141786, "grad_norm": 19.734018733981276, "learning_rate": 9.995242309231568e-06, "loss": 3.48, "step": 992 }, { "epoch": 0.33844580777096117, "grad_norm": 27.019759588525194, "learning_rate": 9.995155426600738e-06, "loss": 3.5682, "step": 993 }, { "epoch": 0.3387866394001363, "grad_norm": 19.897336860144968, "learning_rate": 9.995067758212076e-06, "loss": 3.1228, "step": 994 }, { "epoch": 0.3391274710293115, "grad_norm": 20.203001419003115, "learning_rate": 9.99497930407937e-06, "loss": 3.4332, "step": 995 }, { "epoch": 0.3394683026584867, "grad_norm": 17.63602661928461, "learning_rate": 9.994890064216538e-06, "loss": 3.6235, "step": 996 }, { "epoch": 0.3398091342876619, "grad_norm": 31.562702523443928, "learning_rate": 9.994800038637612e-06, "loss": 3.1123, "step": 997 }, { "epoch": 0.3401499659168371, "grad_norm": 13.812592026331682, "learning_rate": 9.99470922735676e-06, "loss": 2.7344, "step": 998 }, { "epoch": 0.34049079754601225, "grad_norm": 18.92897380675409, "learning_rate": 9.994617630388261e-06, "loss": 2.6105, "step": 999 }, { "epoch": 0.34083162917518744, "grad_norm": 32.44286782331771, "learning_rate": 9.994525247746528e-06, "loss": 4.0815, "step": 1000 }, { "epoch": 0.34117246080436264, "grad_norm": 22.505353032860704, "learning_rate": 9.99443207944609e-06, "loss": 4.1015, "step": 1001 }, { "epoch": 0.34151329243353784, "grad_norm": 18.629999817064817, "learning_rate": 9.994338125501606e-06, "loss": 3.4006, "step": 1002 }, { "epoch": 0.34185412406271304, "grad_norm": 19.31263091644902, "learning_rate": 9.994243385927853e-06, "loss": 3.2789, "step": 1003 }, { "epoch": 0.3421949556918882, "grad_norm": 17.14956943836541, "learning_rate": 9.994147860739734e-06, "loss": 3.3262, "step": 1004 }, { "epoch": 0.3425357873210634, "grad_norm": 19.93820155464246, "learning_rate": 9.994051549952278e-06, "loss": 3.3257, "step": 1005 }, { "epoch": 0.3428766189502386, "grad_norm": 28.41769026316256, "learning_rate": 9.99395445358063e-06, "loss": 2.9444, "step": 1006 }, { "epoch": 0.3432174505794138, "grad_norm": 16.77933358036674, "learning_rate": 9.993856571640068e-06, "loss": 3.5764, "step": 1007 }, { "epoch": 0.34355828220858897, "grad_norm": 18.47257520337231, "learning_rate": 9.993757904145987e-06, "loss": 3.3175, "step": 1008 }, { "epoch": 0.34389911383776417, "grad_norm": 17.752840754367245, "learning_rate": 9.99365845111391e-06, "loss": 3.0823, "step": 1009 }, { "epoch": 0.3442399454669393, "grad_norm": 18.95202423672722, "learning_rate": 9.99355821255948e-06, "loss": 3.6742, "step": 1010 }, { "epoch": 0.3445807770961145, "grad_norm": 32.286025275265054, "learning_rate": 9.993457188498464e-06, "loss": 2.6442, "step": 1011 }, { "epoch": 0.3449216087252897, "grad_norm": 21.20956190627594, "learning_rate": 9.993355378946754e-06, "loss": 3.069, "step": 1012 }, { "epoch": 0.3452624403544649, "grad_norm": 25.048873055573395, "learning_rate": 9.993252783920367e-06, "loss": 3.5511, "step": 1013 }, { "epoch": 0.3456032719836401, "grad_norm": 14.639342274086404, "learning_rate": 9.99314940343544e-06, "loss": 3.1379, "step": 1014 }, { "epoch": 0.34594410361281525, "grad_norm": 13.795425339303037, "learning_rate": 9.993045237508234e-06, "loss": 2.9424, "step": 1015 }, { "epoch": 0.34628493524199044, "grad_norm": 21.74192949293576, "learning_rate": 9.992940286155138e-06, "loss": 3.9335, "step": 1016 }, { "epoch": 0.34662576687116564, "grad_norm": 25.38892144547374, "learning_rate": 9.992834549392656e-06, "loss": 3.3673, "step": 1017 }, { "epoch": 0.34696659850034084, "grad_norm": 19.772981590340688, "learning_rate": 9.992728027237427e-06, "loss": 2.8217, "step": 1018 }, { "epoch": 0.34730743012951604, "grad_norm": 29.92520567380896, "learning_rate": 9.992620719706204e-06, "loss": 3.7031, "step": 1019 }, { "epoch": 0.3476482617586912, "grad_norm": 13.75533107804516, "learning_rate": 9.992512626815866e-06, "loss": 2.8398, "step": 1020 }, { "epoch": 0.3479890933878664, "grad_norm": 24.141401577707605, "learning_rate": 9.992403748583418e-06, "loss": 3.4206, "step": 1021 }, { "epoch": 0.3483299250170416, "grad_norm": 26.208936344467155, "learning_rate": 9.992294085025988e-06, "loss": 3.9491, "step": 1022 }, { "epoch": 0.3486707566462168, "grad_norm": 26.052242924689345, "learning_rate": 9.992183636160823e-06, "loss": 3.1935, "step": 1023 }, { "epoch": 0.34901158827539197, "grad_norm": 20.918931925482756, "learning_rate": 9.992072402005302e-06, "loss": 2.7452, "step": 1024 }, { "epoch": 0.34935241990456717, "grad_norm": 17.18020871842603, "learning_rate": 9.991960382576919e-06, "loss": 2.9875, "step": 1025 }, { "epoch": 0.3496932515337423, "grad_norm": 21.298433274009927, "learning_rate": 9.991847577893295e-06, "loss": 3.9876, "step": 1026 }, { "epoch": 0.3500340831629175, "grad_norm": 24.12213225605878, "learning_rate": 9.991733987972177e-06, "loss": 3.6153, "step": 1027 }, { "epoch": 0.3503749147920927, "grad_norm": 16.416349390066298, "learning_rate": 9.99161961283143e-06, "loss": 3.0713, "step": 1028 }, { "epoch": 0.3507157464212679, "grad_norm": 22.960648603800205, "learning_rate": 9.99150445248905e-06, "loss": 3.1487, "step": 1029 }, { "epoch": 0.3510565780504431, "grad_norm": 16.85458561326823, "learning_rate": 9.991388506963147e-06, "loss": 3.1732, "step": 1030 }, { "epoch": 0.35139740967961824, "grad_norm": 23.96798306310886, "learning_rate": 9.991271776271962e-06, "loss": 3.7223, "step": 1031 }, { "epoch": 0.35173824130879344, "grad_norm": 22.993647256411176, "learning_rate": 9.99115426043386e-06, "loss": 3.5592, "step": 1032 }, { "epoch": 0.35207907293796864, "grad_norm": 17.488522252618193, "learning_rate": 9.991035959467322e-06, "loss": 3.0401, "step": 1033 }, { "epoch": 0.35241990456714384, "grad_norm": 19.23403241155621, "learning_rate": 9.99091687339096e-06, "loss": 3.666, "step": 1034 }, { "epoch": 0.35276073619631904, "grad_norm": 17.482386548088027, "learning_rate": 9.990797002223507e-06, "loss": 3.0659, "step": 1035 }, { "epoch": 0.35310156782549423, "grad_norm": 20.728426574282373, "learning_rate": 9.990676345983818e-06, "loss": 3.2289, "step": 1036 }, { "epoch": 0.3534423994546694, "grad_norm": 22.51524640075904, "learning_rate": 9.990554904690873e-06, "loss": 3.35, "step": 1037 }, { "epoch": 0.3537832310838446, "grad_norm": 19.724930831463066, "learning_rate": 9.990432678363776e-06, "loss": 3.6873, "step": 1038 }, { "epoch": 0.35412406271301977, "grad_norm": 14.408123350344335, "learning_rate": 9.990309667021752e-06, "loss": 3.4883, "step": 1039 }, { "epoch": 0.35446489434219497, "grad_norm": 28.788276423474827, "learning_rate": 9.990185870684152e-06, "loss": 4.1664, "step": 1040 }, { "epoch": 0.35480572597137017, "grad_norm": 15.51585339856858, "learning_rate": 9.990061289370449e-06, "loss": 3.2768, "step": 1041 }, { "epoch": 0.3551465576005453, "grad_norm": 19.502031202402918, "learning_rate": 9.989935923100243e-06, "loss": 3.0676, "step": 1042 }, { "epoch": 0.3554873892297205, "grad_norm": 16.767071111411063, "learning_rate": 9.989809771893251e-06, "loss": 3.0945, "step": 1043 }, { "epoch": 0.3558282208588957, "grad_norm": 22.446917422773613, "learning_rate": 9.98968283576932e-06, "loss": 3.2116, "step": 1044 }, { "epoch": 0.3561690524880709, "grad_norm": 23.20060235834063, "learning_rate": 9.989555114748413e-06, "loss": 2.9539, "step": 1045 }, { "epoch": 0.3565098841172461, "grad_norm": 16.578719503266097, "learning_rate": 9.989426608850626e-06, "loss": 3.4242, "step": 1046 }, { "epoch": 0.35685071574642124, "grad_norm": 22.287035591965946, "learning_rate": 9.989297318096172e-06, "loss": 3.1724, "step": 1047 }, { "epoch": 0.35719154737559644, "grad_norm": 19.32694437642856, "learning_rate": 9.98916724250539e-06, "loss": 3.409, "step": 1048 }, { "epoch": 0.35753237900477164, "grad_norm": 21.36577471081659, "learning_rate": 9.989036382098738e-06, "loss": 3.9806, "step": 1049 }, { "epoch": 0.35787321063394684, "grad_norm": 19.76611956750765, "learning_rate": 9.988904736896803e-06, "loss": 3.3786, "step": 1050 }, { "epoch": 0.35821404226312203, "grad_norm": 15.92363338018686, "learning_rate": 9.988772306920293e-06, "loss": 2.9802, "step": 1051 }, { "epoch": 0.35855487389229723, "grad_norm": 27.428430977679923, "learning_rate": 9.98863909219004e-06, "loss": 4.3364, "step": 1052 }, { "epoch": 0.3588957055214724, "grad_norm": 18.41003612845418, "learning_rate": 9.988505092727e-06, "loss": 3.0822, "step": 1053 }, { "epoch": 0.3592365371506476, "grad_norm": 19.71649056196144, "learning_rate": 9.988370308552251e-06, "loss": 3.2499, "step": 1054 }, { "epoch": 0.35957736877982277, "grad_norm": 23.964632173348072, "learning_rate": 9.988234739686995e-06, "loss": 3.7395, "step": 1055 }, { "epoch": 0.35991820040899797, "grad_norm": 16.992650836957864, "learning_rate": 9.988098386152557e-06, "loss": 2.8445, "step": 1056 }, { "epoch": 0.36025903203817317, "grad_norm": 25.05561620732673, "learning_rate": 9.987961247970386e-06, "loss": 3.0535, "step": 1057 }, { "epoch": 0.3605998636673483, "grad_norm": 20.454041507850672, "learning_rate": 9.987823325162056e-06, "loss": 3.2925, "step": 1058 }, { "epoch": 0.3609406952965235, "grad_norm": 22.82011166604307, "learning_rate": 9.98768461774926e-06, "loss": 3.0006, "step": 1059 }, { "epoch": 0.3612815269256987, "grad_norm": 27.941917653723237, "learning_rate": 9.987545125753818e-06, "loss": 3.9587, "step": 1060 }, { "epoch": 0.3616223585548739, "grad_norm": 19.601863530182797, "learning_rate": 9.987404849197677e-06, "loss": 3.2454, "step": 1061 }, { "epoch": 0.3619631901840491, "grad_norm": 13.508322420607161, "learning_rate": 9.987263788102897e-06, "loss": 3.0896, "step": 1062 }, { "epoch": 0.36230402181322424, "grad_norm": 16.586454953095092, "learning_rate": 9.98712194249167e-06, "loss": 3.906, "step": 1063 }, { "epoch": 0.36264485344239944, "grad_norm": 17.593708759555597, "learning_rate": 9.98697931238631e-06, "loss": 2.8176, "step": 1064 }, { "epoch": 0.36298568507157464, "grad_norm": 22.713781142112314, "learning_rate": 9.986835897809252e-06, "loss": 3.1157, "step": 1065 }, { "epoch": 0.36332651670074984, "grad_norm": 19.645757694658144, "learning_rate": 9.986691698783055e-06, "loss": 3.1626, "step": 1066 }, { "epoch": 0.36366734832992503, "grad_norm": 15.009012236390939, "learning_rate": 9.986546715330402e-06, "loss": 3.3271, "step": 1067 }, { "epoch": 0.36400817995910023, "grad_norm": 18.96198096082133, "learning_rate": 9.986400947474102e-06, "loss": 3.4214, "step": 1068 }, { "epoch": 0.3643490115882754, "grad_norm": 19.166306887422248, "learning_rate": 9.986254395237083e-06, "loss": 3.1294, "step": 1069 }, { "epoch": 0.36468984321745057, "grad_norm": 18.936331885375957, "learning_rate": 9.986107058642398e-06, "loss": 3.638, "step": 1070 }, { "epoch": 0.36503067484662577, "grad_norm": 18.86850166421172, "learning_rate": 9.985958937713226e-06, "loss": 3.5961, "step": 1071 }, { "epoch": 0.36537150647580097, "grad_norm": 17.558983996613158, "learning_rate": 9.985810032472862e-06, "loss": 3.3082, "step": 1072 }, { "epoch": 0.36571233810497616, "grad_norm": 20.467214295716197, "learning_rate": 9.985660342944735e-06, "loss": 3.6779, "step": 1073 }, { "epoch": 0.3660531697341513, "grad_norm": 33.3392009170554, "learning_rate": 9.985509869152388e-06, "loss": 3.5264, "step": 1074 }, { "epoch": 0.3663940013633265, "grad_norm": 23.43209530143601, "learning_rate": 9.985358611119494e-06, "loss": 3.1104, "step": 1075 }, { "epoch": 0.3667348329925017, "grad_norm": 19.71275364543631, "learning_rate": 9.985206568869843e-06, "loss": 3.2861, "step": 1076 }, { "epoch": 0.3670756646216769, "grad_norm": 27.445077004842894, "learning_rate": 9.985053742427355e-06, "loss": 3.3212, "step": 1077 }, { "epoch": 0.3674164962508521, "grad_norm": 20.625561695016522, "learning_rate": 9.984900131816068e-06, "loss": 3.8432, "step": 1078 }, { "epoch": 0.36775732788002724, "grad_norm": 18.33900375240487, "learning_rate": 9.984745737060147e-06, "loss": 3.4131, "step": 1079 }, { "epoch": 0.36809815950920244, "grad_norm": 34.10523488220508, "learning_rate": 9.984590558183879e-06, "loss": 3.0082, "step": 1080 }, { "epoch": 0.36843899113837764, "grad_norm": 23.47722378492024, "learning_rate": 9.984434595211673e-06, "loss": 3.5834, "step": 1081 }, { "epoch": 0.36877982276755283, "grad_norm": 19.92961329084212, "learning_rate": 9.984277848168062e-06, "loss": 2.7864, "step": 1082 }, { "epoch": 0.36912065439672803, "grad_norm": 18.068075194691808, "learning_rate": 9.984120317077707e-06, "loss": 3.014, "step": 1083 }, { "epoch": 0.36946148602590323, "grad_norm": 43.0074893297257, "learning_rate": 9.983962001965383e-06, "loss": 3.4474, "step": 1084 }, { "epoch": 0.36980231765507837, "grad_norm": 18.353886047426464, "learning_rate": 9.983802902855998e-06, "loss": 3.0148, "step": 1085 }, { "epoch": 0.37014314928425357, "grad_norm": 19.82067375153483, "learning_rate": 9.983643019774577e-06, "loss": 3.4038, "step": 1086 }, { "epoch": 0.37048398091342877, "grad_norm": 14.983948240272273, "learning_rate": 9.983482352746269e-06, "loss": 3.4074, "step": 1087 }, { "epoch": 0.37082481254260397, "grad_norm": 36.498563058136014, "learning_rate": 9.983320901796349e-06, "loss": 3.5127, "step": 1088 }, { "epoch": 0.37116564417177916, "grad_norm": 20.19239493846256, "learning_rate": 9.983158666950215e-06, "loss": 3.1834, "step": 1089 }, { "epoch": 0.3715064758009543, "grad_norm": 17.543431272890693, "learning_rate": 9.982995648233385e-06, "loss": 3.2767, "step": 1090 }, { "epoch": 0.3718473074301295, "grad_norm": 15.76336839049142, "learning_rate": 9.982831845671503e-06, "loss": 3.0666, "step": 1091 }, { "epoch": 0.3721881390593047, "grad_norm": 18.608142862155752, "learning_rate": 9.982667259290336e-06, "loss": 3.4996, "step": 1092 }, { "epoch": 0.3725289706884799, "grad_norm": 15.082139354553401, "learning_rate": 9.982501889115774e-06, "loss": 3.0387, "step": 1093 }, { "epoch": 0.3728698023176551, "grad_norm": 20.769727737871346, "learning_rate": 9.982335735173832e-06, "loss": 3.4275, "step": 1094 }, { "epoch": 0.37321063394683024, "grad_norm": 20.48396914150774, "learning_rate": 9.982168797490644e-06, "loss": 3.883, "step": 1095 }, { "epoch": 0.37355146557600544, "grad_norm": 19.425064877229985, "learning_rate": 9.982001076092471e-06, "loss": 3.1877, "step": 1096 }, { "epoch": 0.37389229720518063, "grad_norm": 16.570002116038122, "learning_rate": 9.981832571005698e-06, "loss": 3.4833, "step": 1097 }, { "epoch": 0.37423312883435583, "grad_norm": 16.114235220459534, "learning_rate": 9.98166328225683e-06, "loss": 3.4179, "step": 1098 }, { "epoch": 0.37457396046353103, "grad_norm": 25.08823715116121, "learning_rate": 9.981493209872498e-06, "loss": 3.8198, "step": 1099 }, { "epoch": 0.37491479209270623, "grad_norm": 16.35297073757911, "learning_rate": 9.98132235387945e-06, "loss": 3.023, "step": 1100 }, { "epoch": 0.37525562372188137, "grad_norm": 22.283598192950166, "learning_rate": 9.98115071430457e-06, "loss": 3.5575, "step": 1101 }, { "epoch": 0.37559645535105657, "grad_norm": 29.156705265778815, "learning_rate": 9.980978291174851e-06, "loss": 3.362, "step": 1102 }, { "epoch": 0.37593728698023177, "grad_norm": 24.999208422625884, "learning_rate": 9.98080508451742e-06, "loss": 3.1936, "step": 1103 }, { "epoch": 0.37627811860940696, "grad_norm": 46.42766859682043, "learning_rate": 9.980631094359523e-06, "loss": 3.7643, "step": 1104 }, { "epoch": 0.37661895023858216, "grad_norm": 19.306817013925766, "learning_rate": 9.980456320728526e-06, "loss": 3.0389, "step": 1105 }, { "epoch": 0.3769597818677573, "grad_norm": 20.47327378173784, "learning_rate": 9.980280763651925e-06, "loss": 3.407, "step": 1106 }, { "epoch": 0.3773006134969325, "grad_norm": 27.978346320783228, "learning_rate": 9.980104423157334e-06, "loss": 3.1507, "step": 1107 }, { "epoch": 0.3776414451261077, "grad_norm": 18.358841119529536, "learning_rate": 9.979927299272493e-06, "loss": 3.3895, "step": 1108 }, { "epoch": 0.3779822767552829, "grad_norm": 17.326984252146055, "learning_rate": 9.979749392025264e-06, "loss": 3.1683, "step": 1109 }, { "epoch": 0.3783231083844581, "grad_norm": 18.364077076491938, "learning_rate": 9.97957070144363e-06, "loss": 3.653, "step": 1110 }, { "epoch": 0.37866394001363324, "grad_norm": 19.982112407445076, "learning_rate": 9.979391227555706e-06, "loss": 4.0101, "step": 1111 }, { "epoch": 0.37900477164280844, "grad_norm": 26.75860803747121, "learning_rate": 9.979210970389718e-06, "loss": 3.4242, "step": 1112 }, { "epoch": 0.37934560327198363, "grad_norm": 42.1185977821696, "learning_rate": 9.979029929974022e-06, "loss": 3.1769, "step": 1113 }, { "epoch": 0.37968643490115883, "grad_norm": 20.095407942551564, "learning_rate": 9.9788481063371e-06, "loss": 3.0247, "step": 1114 }, { "epoch": 0.38002726653033403, "grad_norm": 35.16746100125832, "learning_rate": 9.97866549950755e-06, "loss": 3.4061, "step": 1115 }, { "epoch": 0.3803680981595092, "grad_norm": 18.887572618894907, "learning_rate": 9.978482109514098e-06, "loss": 3.5824, "step": 1116 }, { "epoch": 0.38070892978868437, "grad_norm": 16.1929320972325, "learning_rate": 9.978297936385593e-06, "loss": 3.2194, "step": 1117 }, { "epoch": 0.38104976141785957, "grad_norm": 24.367166620844337, "learning_rate": 9.978112980151005e-06, "loss": 3.2417, "step": 1118 }, { "epoch": 0.38139059304703476, "grad_norm": 18.627421366288406, "learning_rate": 9.977927240839428e-06, "loss": 2.8761, "step": 1119 }, { "epoch": 0.38173142467620996, "grad_norm": 24.181655497000293, "learning_rate": 9.97774071848008e-06, "loss": 4.2616, "step": 1120 }, { "epoch": 0.38207225630538516, "grad_norm": 24.902860714299575, "learning_rate": 9.9775534131023e-06, "loss": 3.7441, "step": 1121 }, { "epoch": 0.3824130879345603, "grad_norm": 15.899223906028, "learning_rate": 9.977365324735557e-06, "loss": 2.8298, "step": 1122 }, { "epoch": 0.3827539195637355, "grad_norm": 35.296482765797435, "learning_rate": 9.977176453409433e-06, "loss": 3.0591, "step": 1123 }, { "epoch": 0.3830947511929107, "grad_norm": 46.64766714505639, "learning_rate": 9.97698679915364e-06, "loss": 3.0709, "step": 1124 }, { "epoch": 0.3834355828220859, "grad_norm": 16.297870507727698, "learning_rate": 9.976796361998012e-06, "loss": 3.1372, "step": 1125 }, { "epoch": 0.3837764144512611, "grad_norm": 16.298386037359844, "learning_rate": 9.976605141972502e-06, "loss": 3.4797, "step": 1126 }, { "epoch": 0.3841172460804363, "grad_norm": 22.4298983907977, "learning_rate": 9.976413139107195e-06, "loss": 3.3933, "step": 1127 }, { "epoch": 0.38445807770961143, "grad_norm": 14.223846316844636, "learning_rate": 9.97622035343229e-06, "loss": 3.0967, "step": 1128 }, { "epoch": 0.38479890933878663, "grad_norm": 13.061440596225498, "learning_rate": 9.976026784978116e-06, "loss": 2.8874, "step": 1129 }, { "epoch": 0.38513974096796183, "grad_norm": 22.189187265980017, "learning_rate": 9.97583243377512e-06, "loss": 3.0119, "step": 1130 }, { "epoch": 0.38548057259713703, "grad_norm": 21.628301764070514, "learning_rate": 9.975637299853872e-06, "loss": 3.3413, "step": 1131 }, { "epoch": 0.3858214042263122, "grad_norm": 16.888657742961943, "learning_rate": 9.975441383245073e-06, "loss": 3.1451, "step": 1132 }, { "epoch": 0.38616223585548737, "grad_norm": 14.914431617443563, "learning_rate": 9.975244683979536e-06, "loss": 3.0768, "step": 1133 }, { "epoch": 0.38650306748466257, "grad_norm": 23.70323853397031, "learning_rate": 9.975047202088206e-06, "loss": 2.8694, "step": 1134 }, { "epoch": 0.38684389911383776, "grad_norm": 17.137650281738782, "learning_rate": 9.974848937602146e-06, "loss": 3.8108, "step": 1135 }, { "epoch": 0.38718473074301296, "grad_norm": 16.805433532685782, "learning_rate": 9.974649890552546e-06, "loss": 3.1146, "step": 1136 }, { "epoch": 0.38752556237218816, "grad_norm": 20.51841487254913, "learning_rate": 9.974450060970715e-06, "loss": 3.8989, "step": 1137 }, { "epoch": 0.3878663940013633, "grad_norm": 15.191953214370274, "learning_rate": 9.974249448888084e-06, "loss": 3.4339, "step": 1138 }, { "epoch": 0.3882072256305385, "grad_norm": 23.729340425477773, "learning_rate": 9.974048054336218e-06, "loss": 3.3732, "step": 1139 }, { "epoch": 0.3885480572597137, "grad_norm": 23.53579880755216, "learning_rate": 9.97384587734679e-06, "loss": 3.2826, "step": 1140 }, { "epoch": 0.3888888888888889, "grad_norm": 27.01249958206964, "learning_rate": 9.973642917951604e-06, "loss": 3.4409, "step": 1141 }, { "epoch": 0.3892297205180641, "grad_norm": 16.932066428977826, "learning_rate": 9.97343917618259e-06, "loss": 3.154, "step": 1142 }, { "epoch": 0.3895705521472393, "grad_norm": 17.046706001021196, "learning_rate": 9.973234652071797e-06, "loss": 3.1687, "step": 1143 }, { "epoch": 0.38991138377641443, "grad_norm": 14.264721413914925, "learning_rate": 9.973029345651395e-06, "loss": 2.7162, "step": 1144 }, { "epoch": 0.39025221540558963, "grad_norm": 35.9278860429445, "learning_rate": 9.972823256953679e-06, "loss": 2.7275, "step": 1145 }, { "epoch": 0.39059304703476483, "grad_norm": 18.266933682048528, "learning_rate": 9.97261638601107e-06, "loss": 3.4257, "step": 1146 }, { "epoch": 0.39093387866394, "grad_norm": 15.886323661873147, "learning_rate": 9.97240873285611e-06, "loss": 3.4741, "step": 1147 }, { "epoch": 0.3912747102931152, "grad_norm": 25.055164552576443, "learning_rate": 9.972200297521459e-06, "loss": 3.3787, "step": 1148 }, { "epoch": 0.39161554192229037, "grad_norm": 37.54278055158264, "learning_rate": 9.971991080039912e-06, "loss": 3.7619, "step": 1149 }, { "epoch": 0.39195637355146556, "grad_norm": 23.64788019628668, "learning_rate": 9.971781080444372e-06, "loss": 2.8984, "step": 1150 }, { "epoch": 0.39229720518064076, "grad_norm": 15.349091148105563, "learning_rate": 9.97157029876788e-06, "loss": 3.3605, "step": 1151 }, { "epoch": 0.39263803680981596, "grad_norm": 24.62082065806219, "learning_rate": 9.971358735043586e-06, "loss": 3.7627, "step": 1152 }, { "epoch": 0.39297886843899116, "grad_norm": 19.19083650158772, "learning_rate": 9.971146389304775e-06, "loss": 3.2797, "step": 1153 }, { "epoch": 0.3933197000681663, "grad_norm": 25.65918065270858, "learning_rate": 9.970933261584848e-06, "loss": 3.3647, "step": 1154 }, { "epoch": 0.3936605316973415, "grad_norm": 26.15407115935983, "learning_rate": 9.97071935191733e-06, "loss": 3.6857, "step": 1155 }, { "epoch": 0.3940013633265167, "grad_norm": 26.932113974397037, "learning_rate": 9.97050466033587e-06, "loss": 3.0976, "step": 1156 }, { "epoch": 0.3943421949556919, "grad_norm": 19.21007095761264, "learning_rate": 9.970289186874242e-06, "loss": 2.8935, "step": 1157 }, { "epoch": 0.3946830265848671, "grad_norm": 17.625942220545294, "learning_rate": 9.970072931566339e-06, "loss": 2.8462, "step": 1158 }, { "epoch": 0.3950238582140423, "grad_norm": 29.5502537398197, "learning_rate": 9.96985589444618e-06, "loss": 3.1347, "step": 1159 }, { "epoch": 0.39536468984321743, "grad_norm": 23.923542769331835, "learning_rate": 9.969638075547904e-06, "loss": 3.9263, "step": 1160 }, { "epoch": 0.39570552147239263, "grad_norm": 25.52929156483782, "learning_rate": 9.969419474905775e-06, "loss": 3.4313, "step": 1161 }, { "epoch": 0.3960463531015678, "grad_norm": 19.947560843630296, "learning_rate": 9.969200092554182e-06, "loss": 3.5263, "step": 1162 }, { "epoch": 0.396387184730743, "grad_norm": 24.390865720884936, "learning_rate": 9.968979928527632e-06, "loss": 3.9256, "step": 1163 }, { "epoch": 0.3967280163599182, "grad_norm": 17.451106354969276, "learning_rate": 9.96875898286076e-06, "loss": 3.0229, "step": 1164 }, { "epoch": 0.39706884798909337, "grad_norm": 20.588576688527375, "learning_rate": 9.96853725558832e-06, "loss": 3.699, "step": 1165 }, { "epoch": 0.39740967961826856, "grad_norm": 29.712170549512468, "learning_rate": 9.96831474674519e-06, "loss": 3.8406, "step": 1166 }, { "epoch": 0.39775051124744376, "grad_norm": 16.2421190833622, "learning_rate": 9.968091456366374e-06, "loss": 2.9316, "step": 1167 }, { "epoch": 0.39809134287661896, "grad_norm": 29.979478379399776, "learning_rate": 9.967867384486994e-06, "loss": 3.6148, "step": 1168 }, { "epoch": 0.39843217450579416, "grad_norm": 15.006749803392585, "learning_rate": 9.967642531142298e-06, "loss": 3.3437, "step": 1169 }, { "epoch": 0.3987730061349693, "grad_norm": 17.17188861440722, "learning_rate": 9.967416896367659e-06, "loss": 3.1986, "step": 1170 }, { "epoch": 0.3991138377641445, "grad_norm": 15.382429578297627, "learning_rate": 9.967190480198568e-06, "loss": 3.0635, "step": 1171 }, { "epoch": 0.3994546693933197, "grad_norm": 21.75481244313121, "learning_rate": 9.96696328267064e-06, "loss": 3.1002, "step": 1172 }, { "epoch": 0.3997955010224949, "grad_norm": 21.65600401874912, "learning_rate": 9.966735303819615e-06, "loss": 3.4696, "step": 1173 }, { "epoch": 0.4001363326516701, "grad_norm": 26.20538907480564, "learning_rate": 9.966506543681355e-06, "loss": 3.6298, "step": 1174 }, { "epoch": 0.4004771642808453, "grad_norm": 15.384027607111575, "learning_rate": 9.966277002291846e-06, "loss": 3.1741, "step": 1175 }, { "epoch": 0.40081799591002043, "grad_norm": 21.575583361437022, "learning_rate": 9.966046679687196e-06, "loss": 3.4558, "step": 1176 }, { "epoch": 0.40115882753919563, "grad_norm": 14.93224192590806, "learning_rate": 9.965815575903633e-06, "loss": 3.1491, "step": 1177 }, { "epoch": 0.4014996591683708, "grad_norm": 18.266480124060873, "learning_rate": 9.965583690977513e-06, "loss": 3.271, "step": 1178 }, { "epoch": 0.401840490797546, "grad_norm": 18.15014878812278, "learning_rate": 9.96535102494531e-06, "loss": 3.4557, "step": 1179 }, { "epoch": 0.4021813224267212, "grad_norm": 14.78591868993229, "learning_rate": 9.965117577843627e-06, "loss": 3.5044, "step": 1180 }, { "epoch": 0.40252215405589636, "grad_norm": 18.006239556464017, "learning_rate": 9.964883349709182e-06, "loss": 3.432, "step": 1181 }, { "epoch": 0.40286298568507156, "grad_norm": 15.5006342128743, "learning_rate": 9.964648340578825e-06, "loss": 3.2959, "step": 1182 }, { "epoch": 0.40320381731424676, "grad_norm": 22.29590921288553, "learning_rate": 9.964412550489518e-06, "loss": 2.9379, "step": 1183 }, { "epoch": 0.40354464894342196, "grad_norm": 14.666222181379753, "learning_rate": 9.964175979478356e-06, "loss": 3.3488, "step": 1184 }, { "epoch": 0.40388548057259716, "grad_norm": 27.572666016295813, "learning_rate": 9.96393862758255e-06, "loss": 3.4957, "step": 1185 }, { "epoch": 0.4042263122017723, "grad_norm": 26.041482139458992, "learning_rate": 9.963700494839439e-06, "loss": 3.5245, "step": 1186 }, { "epoch": 0.4045671438309475, "grad_norm": 10.721662597377698, "learning_rate": 9.963461581286479e-06, "loss": 2.7889, "step": 1187 }, { "epoch": 0.4049079754601227, "grad_norm": 21.171672107576963, "learning_rate": 9.963221886961256e-06, "loss": 3.3197, "step": 1188 }, { "epoch": 0.4052488070892979, "grad_norm": 16.45736871802216, "learning_rate": 9.962981411901472e-06, "loss": 2.6047, "step": 1189 }, { "epoch": 0.4055896387184731, "grad_norm": 18.754869212454427, "learning_rate": 9.962740156144955e-06, "loss": 3.74, "step": 1190 }, { "epoch": 0.4059304703476483, "grad_norm": 19.258123340232142, "learning_rate": 9.962498119729657e-06, "loss": 3.4674, "step": 1191 }, { "epoch": 0.40627130197682343, "grad_norm": 22.752948788345012, "learning_rate": 9.962255302693649e-06, "loss": 3.315, "step": 1192 }, { "epoch": 0.4066121336059986, "grad_norm": 15.21069862633653, "learning_rate": 9.962011705075129e-06, "loss": 3.1081, "step": 1193 }, { "epoch": 0.4069529652351738, "grad_norm": 23.76549426568509, "learning_rate": 9.961767326912415e-06, "loss": 3.1769, "step": 1194 }, { "epoch": 0.407293796864349, "grad_norm": 29.168571396164477, "learning_rate": 9.961522168243948e-06, "loss": 3.4966, "step": 1195 }, { "epoch": 0.4076346284935242, "grad_norm": 19.822056281598023, "learning_rate": 9.961276229108294e-06, "loss": 3.4679, "step": 1196 }, { "epoch": 0.40797546012269936, "grad_norm": 19.875313755070206, "learning_rate": 9.96102950954414e-06, "loss": 3.2736, "step": 1197 }, { "epoch": 0.40831629175187456, "grad_norm": 17.07387588528351, "learning_rate": 9.960782009590294e-06, "loss": 3.2129, "step": 1198 }, { "epoch": 0.40865712338104976, "grad_norm": 42.11518279910929, "learning_rate": 9.96053372928569e-06, "loss": 3.3457, "step": 1199 }, { "epoch": 0.40899795501022496, "grad_norm": 29.04058917696115, "learning_rate": 9.960284668669386e-06, "loss": 3.9032, "step": 1200 }, { "epoch": 0.40933878663940015, "grad_norm": 18.127108684787718, "learning_rate": 9.960034827780557e-06, "loss": 3.0157, "step": 1201 }, { "epoch": 0.4096796182685753, "grad_norm": 27.258651527999735, "learning_rate": 9.959784206658502e-06, "loss": 3.168, "step": 1202 }, { "epoch": 0.4100204498977505, "grad_norm": 18.857935641290723, "learning_rate": 9.95953280534265e-06, "loss": 3.3551, "step": 1203 }, { "epoch": 0.4103612815269257, "grad_norm": 34.289783103871564, "learning_rate": 9.959280623872546e-06, "loss": 3.1036, "step": 1204 }, { "epoch": 0.4107021131561009, "grad_norm": 24.660154376713834, "learning_rate": 9.959027662287856e-06, "loss": 3.571, "step": 1205 }, { "epoch": 0.4110429447852761, "grad_norm": 28.81366649594681, "learning_rate": 9.958773920628375e-06, "loss": 3.7016, "step": 1206 }, { "epoch": 0.4113837764144513, "grad_norm": 18.909304606844096, "learning_rate": 9.958519398934017e-06, "loss": 3.4561, "step": 1207 }, { "epoch": 0.4117246080436264, "grad_norm": 31.51682313397142, "learning_rate": 9.95826409724482e-06, "loss": 3.748, "step": 1208 }, { "epoch": 0.4120654396728016, "grad_norm": 17.73897980575181, "learning_rate": 9.95800801560094e-06, "loss": 3.4925, "step": 1209 }, { "epoch": 0.4124062713019768, "grad_norm": 30.239087492543515, "learning_rate": 9.957751154042663e-06, "loss": 3.6837, "step": 1210 }, { "epoch": 0.412747102931152, "grad_norm": 18.016705569432823, "learning_rate": 9.957493512610394e-06, "loss": 3.2729, "step": 1211 }, { "epoch": 0.4130879345603272, "grad_norm": 16.39644097297851, "learning_rate": 9.95723509134466e-06, "loss": 2.9552, "step": 1212 }, { "epoch": 0.41342876618950236, "grad_norm": 29.66237232711792, "learning_rate": 9.956975890286115e-06, "loss": 3.465, "step": 1213 }, { "epoch": 0.41376959781867756, "grad_norm": 14.694052660780379, "learning_rate": 9.956715909475528e-06, "loss": 3.0025, "step": 1214 }, { "epoch": 0.41411042944785276, "grad_norm": 24.375854368229295, "learning_rate": 9.956455148953797e-06, "loss": 3.6427, "step": 1215 }, { "epoch": 0.41445126107702795, "grad_norm": 17.762791502644774, "learning_rate": 9.956193608761941e-06, "loss": 3.5354, "step": 1216 }, { "epoch": 0.41479209270620315, "grad_norm": 20.639002739532813, "learning_rate": 9.955931288941101e-06, "loss": 3.4592, "step": 1217 }, { "epoch": 0.41513292433537835, "grad_norm": 15.908862691975889, "learning_rate": 9.95566818953254e-06, "loss": 3.1109, "step": 1218 }, { "epoch": 0.4154737559645535, "grad_norm": 15.752169728877808, "learning_rate": 9.955404310577648e-06, "loss": 3.2173, "step": 1219 }, { "epoch": 0.4158145875937287, "grad_norm": 28.653289453367275, "learning_rate": 9.955139652117929e-06, "loss": 3.6138, "step": 1220 }, { "epoch": 0.4161554192229039, "grad_norm": 24.058059072534068, "learning_rate": 9.954874214195018e-06, "loss": 3.6496, "step": 1221 }, { "epoch": 0.4164962508520791, "grad_norm": 19.41644537365908, "learning_rate": 9.954607996850669e-06, "loss": 4.0455, "step": 1222 }, { "epoch": 0.4168370824812543, "grad_norm": 16.583559865747322, "learning_rate": 9.95434100012676e-06, "loss": 3.4023, "step": 1223 }, { "epoch": 0.4171779141104294, "grad_norm": 34.37938987958627, "learning_rate": 9.954073224065288e-06, "loss": 3.2806, "step": 1224 }, { "epoch": 0.4175187457396046, "grad_norm": 20.956859628261356, "learning_rate": 9.953804668708379e-06, "loss": 3.123, "step": 1225 }, { "epoch": 0.4178595773687798, "grad_norm": 25.40207370172699, "learning_rate": 9.953535334098273e-06, "loss": 3.4604, "step": 1226 }, { "epoch": 0.418200408997955, "grad_norm": 19.580034035730385, "learning_rate": 9.953265220277341e-06, "loss": 3.3837, "step": 1227 }, { "epoch": 0.4185412406271302, "grad_norm": 20.274286627222697, "learning_rate": 9.952994327288072e-06, "loss": 3.4538, "step": 1228 }, { "epoch": 0.41888207225630536, "grad_norm": 21.000560022001828, "learning_rate": 9.952722655173079e-06, "loss": 3.4361, "step": 1229 }, { "epoch": 0.41922290388548056, "grad_norm": 16.163294430206808, "learning_rate": 9.952450203975097e-06, "loss": 3.0352, "step": 1230 }, { "epoch": 0.41956373551465576, "grad_norm": 12.96291220419135, "learning_rate": 9.952176973736983e-06, "loss": 2.5976, "step": 1231 }, { "epoch": 0.41990456714383095, "grad_norm": 20.173337170746663, "learning_rate": 9.951902964501717e-06, "loss": 3.7217, "step": 1232 }, { "epoch": 0.42024539877300615, "grad_norm": 14.254582149545545, "learning_rate": 9.951628176312403e-06, "loss": 3.1739, "step": 1233 }, { "epoch": 0.42058623040218135, "grad_norm": 20.32513125576033, "learning_rate": 9.951352609212264e-06, "loss": 3.3297, "step": 1234 }, { "epoch": 0.4209270620313565, "grad_norm": 13.410238802540723, "learning_rate": 9.95107626324465e-06, "loss": 3.1863, "step": 1235 }, { "epoch": 0.4212678936605317, "grad_norm": 14.14263142417603, "learning_rate": 9.950799138453033e-06, "loss": 3.0327, "step": 1236 }, { "epoch": 0.4216087252897069, "grad_norm": 25.265025431958858, "learning_rate": 9.950521234881001e-06, "loss": 3.0474, "step": 1237 }, { "epoch": 0.4219495569188821, "grad_norm": 24.27652017043735, "learning_rate": 9.950242552572272e-06, "loss": 3.2635, "step": 1238 }, { "epoch": 0.4222903885480573, "grad_norm": 12.159838230143585, "learning_rate": 9.949963091570684e-06, "loss": 2.6814, "step": 1239 }, { "epoch": 0.4226312201772324, "grad_norm": 16.303265990147615, "learning_rate": 9.949682851920198e-06, "loss": 3.4281, "step": 1240 }, { "epoch": 0.4229720518064076, "grad_norm": 13.423285503936983, "learning_rate": 9.949401833664896e-06, "loss": 2.7545, "step": 1241 }, { "epoch": 0.4233128834355828, "grad_norm": 12.987657885589574, "learning_rate": 9.949120036848983e-06, "loss": 2.7399, "step": 1242 }, { "epoch": 0.423653715064758, "grad_norm": 16.19256076803677, "learning_rate": 9.948837461516786e-06, "loss": 3.1973, "step": 1243 }, { "epoch": 0.4239945466939332, "grad_norm": 19.030230950402757, "learning_rate": 9.948554107712756e-06, "loss": 2.4848, "step": 1244 }, { "epoch": 0.42433537832310836, "grad_norm": 21.02031776470229, "learning_rate": 9.948269975481466e-06, "loss": 2.8758, "step": 1245 }, { "epoch": 0.42467620995228356, "grad_norm": 17.34334703785008, "learning_rate": 9.947985064867612e-06, "loss": 3.4508, "step": 1246 }, { "epoch": 0.42501704158145875, "grad_norm": 29.087033257442627, "learning_rate": 9.947699375916009e-06, "loss": 3.3941, "step": 1247 }, { "epoch": 0.42535787321063395, "grad_norm": 20.83494120974773, "learning_rate": 9.9474129086716e-06, "loss": 3.2492, "step": 1248 }, { "epoch": 0.42569870483980915, "grad_norm": 24.470379155611795, "learning_rate": 9.947125663179444e-06, "loss": 2.8744, "step": 1249 }, { "epoch": 0.42603953646898435, "grad_norm": 21.22617400484804, "learning_rate": 9.94683763948473e-06, "loss": 3.4913, "step": 1250 }, { "epoch": 0.4263803680981595, "grad_norm": 27.574729562831525, "learning_rate": 9.946548837632762e-06, "loss": 3.7022, "step": 1251 }, { "epoch": 0.4267211997273347, "grad_norm": 16.395532414861783, "learning_rate": 9.94625925766897e-06, "loss": 3.145, "step": 1252 }, { "epoch": 0.4270620313565099, "grad_norm": 21.46362741262201, "learning_rate": 9.945968899638908e-06, "loss": 3.705, "step": 1253 }, { "epoch": 0.4274028629856851, "grad_norm": 18.92734035964414, "learning_rate": 9.945677763588248e-06, "loss": 3.6316, "step": 1254 }, { "epoch": 0.4277436946148603, "grad_norm": 12.637277191977182, "learning_rate": 9.94538584956279e-06, "loss": 2.7837, "step": 1255 }, { "epoch": 0.4280845262440354, "grad_norm": 16.256621494279322, "learning_rate": 9.94509315760845e-06, "loss": 3.1757, "step": 1256 }, { "epoch": 0.4284253578732106, "grad_norm": 18.874930993252317, "learning_rate": 9.944799687771272e-06, "loss": 3.2242, "step": 1257 }, { "epoch": 0.4287661895023858, "grad_norm": 14.882314364715942, "learning_rate": 9.94450544009742e-06, "loss": 3.342, "step": 1258 }, { "epoch": 0.429107021131561, "grad_norm": 16.883415933766166, "learning_rate": 9.944210414633178e-06, "loss": 3.0105, "step": 1259 }, { "epoch": 0.4294478527607362, "grad_norm": 20.86418630286833, "learning_rate": 9.943914611424956e-06, "loss": 3.6061, "step": 1260 }, { "epoch": 0.42978868438991136, "grad_norm": 25.392180719892604, "learning_rate": 9.943618030519285e-06, "loss": 3.7127, "step": 1261 }, { "epoch": 0.43012951601908656, "grad_norm": 24.60540995441095, "learning_rate": 9.943320671962818e-06, "loss": 3.6651, "step": 1262 }, { "epoch": 0.43047034764826175, "grad_norm": 19.45498135661641, "learning_rate": 9.943022535802332e-06, "loss": 3.1034, "step": 1263 }, { "epoch": 0.43081117927743695, "grad_norm": 17.77692078217039, "learning_rate": 9.942723622084722e-06, "loss": 3.2344, "step": 1264 }, { "epoch": 0.43115201090661215, "grad_norm": 32.42464635095363, "learning_rate": 9.942423930857013e-06, "loss": 2.9709, "step": 1265 }, { "epoch": 0.43149284253578735, "grad_norm": 15.556021872160075, "learning_rate": 9.942123462166344e-06, "loss": 3.4175, "step": 1266 }, { "epoch": 0.4318336741649625, "grad_norm": 13.955911581250279, "learning_rate": 9.94182221605998e-06, "loss": 3.042, "step": 1267 }, { "epoch": 0.4321745057941377, "grad_norm": 16.86786396289203, "learning_rate": 9.941520192585312e-06, "loss": 3.3289, "step": 1268 }, { "epoch": 0.4325153374233129, "grad_norm": 22.964851697215803, "learning_rate": 9.941217391789844e-06, "loss": 3.7592, "step": 1269 }, { "epoch": 0.4328561690524881, "grad_norm": 24.517079363772993, "learning_rate": 9.940913813721212e-06, "loss": 3.5793, "step": 1270 }, { "epoch": 0.4331970006816633, "grad_norm": 18.78949794075681, "learning_rate": 9.940609458427166e-06, "loss": 3.7287, "step": 1271 }, { "epoch": 0.4335378323108384, "grad_norm": 21.565742124473232, "learning_rate": 9.940304325955588e-06, "loss": 3.5396, "step": 1272 }, { "epoch": 0.4338786639400136, "grad_norm": 19.680444183507277, "learning_rate": 9.93999841635447e-06, "loss": 2.7035, "step": 1273 }, { "epoch": 0.4342194955691888, "grad_norm": 22.108099453826366, "learning_rate": 9.93969172967194e-06, "loss": 3.1808, "step": 1274 }, { "epoch": 0.434560327198364, "grad_norm": 25.223980953538707, "learning_rate": 9.939384265956234e-06, "loss": 3.6471, "step": 1275 }, { "epoch": 0.4349011588275392, "grad_norm": 20.273502464218655, "learning_rate": 9.939076025255721e-06, "loss": 3.1385, "step": 1276 }, { "epoch": 0.43524199045671436, "grad_norm": 16.593079925893687, "learning_rate": 9.938767007618889e-06, "loss": 3.469, "step": 1277 }, { "epoch": 0.43558282208588955, "grad_norm": 23.33648361810804, "learning_rate": 9.938457213094345e-06, "loss": 3.266, "step": 1278 }, { "epoch": 0.43592365371506475, "grad_norm": 20.581816660634118, "learning_rate": 9.938146641730824e-06, "loss": 3.5982, "step": 1279 }, { "epoch": 0.43626448534423995, "grad_norm": 13.341607087665903, "learning_rate": 9.937835293577178e-06, "loss": 2.9119, "step": 1280 }, { "epoch": 0.43660531697341515, "grad_norm": 12.713325164074323, "learning_rate": 9.937523168682384e-06, "loss": 2.9269, "step": 1281 }, { "epoch": 0.43694614860259035, "grad_norm": 15.077218369847607, "learning_rate": 9.93721026709554e-06, "loss": 3.2999, "step": 1282 }, { "epoch": 0.4372869802317655, "grad_norm": 25.765249453743216, "learning_rate": 9.936896588865867e-06, "loss": 3.2603, "step": 1283 }, { "epoch": 0.4376278118609407, "grad_norm": 15.18273195464565, "learning_rate": 9.93658213404271e-06, "loss": 3.0309, "step": 1284 }, { "epoch": 0.4379686434901159, "grad_norm": 18.45777383135803, "learning_rate": 9.93626690267553e-06, "loss": 3.263, "step": 1285 }, { "epoch": 0.4383094751192911, "grad_norm": 15.052442524061176, "learning_rate": 9.935950894813916e-06, "loss": 3.4803, "step": 1286 }, { "epoch": 0.4386503067484663, "grad_norm": 24.46358566489536, "learning_rate": 9.935634110507577e-06, "loss": 3.5294, "step": 1287 }, { "epoch": 0.4389911383776414, "grad_norm": 15.401733581329813, "learning_rate": 9.935316549806347e-06, "loss": 3.159, "step": 1288 }, { "epoch": 0.4393319700068166, "grad_norm": 15.17432376586215, "learning_rate": 9.934998212760177e-06, "loss": 3.2565, "step": 1289 }, { "epoch": 0.4396728016359918, "grad_norm": 20.23005109547779, "learning_rate": 9.93467909941914e-06, "loss": 3.2633, "step": 1290 }, { "epoch": 0.440013633265167, "grad_norm": 17.34527047308293, "learning_rate": 9.93435920983344e-06, "loss": 3.2543, "step": 1291 }, { "epoch": 0.4403544648943422, "grad_norm": 23.654481789461396, "learning_rate": 9.934038544053393e-06, "loss": 3.4398, "step": 1292 }, { "epoch": 0.44069529652351735, "grad_norm": 17.30524358878623, "learning_rate": 9.933717102129442e-06, "loss": 3.568, "step": 1293 }, { "epoch": 0.44103612815269255, "grad_norm": 40.73849045675947, "learning_rate": 9.933394884112149e-06, "loss": 3.3131, "step": 1294 }, { "epoch": 0.44137695978186775, "grad_norm": 13.48233386384235, "learning_rate": 9.933071890052203e-06, "loss": 2.8618, "step": 1295 }, { "epoch": 0.44171779141104295, "grad_norm": 19.3739197822594, "learning_rate": 9.93274812000041e-06, "loss": 3.4381, "step": 1296 }, { "epoch": 0.44205862304021815, "grad_norm": 22.505236846709582, "learning_rate": 9.932423574007704e-06, "loss": 3.5259, "step": 1297 }, { "epoch": 0.44239945466939334, "grad_norm": 18.166863152816198, "learning_rate": 9.932098252125134e-06, "loss": 3.3791, "step": 1298 }, { "epoch": 0.4427402862985685, "grad_norm": 18.67870889470471, "learning_rate": 9.931772154403876e-06, "loss": 2.9438, "step": 1299 }, { "epoch": 0.4430811179277437, "grad_norm": 19.365169792685528, "learning_rate": 9.931445280895225e-06, "loss": 3.3447, "step": 1300 }, { "epoch": 0.4434219495569189, "grad_norm": 17.479787423043756, "learning_rate": 9.931117631650599e-06, "loss": 3.0086, "step": 1301 }, { "epoch": 0.4437627811860941, "grad_norm": 26.78678862461852, "learning_rate": 9.930789206721543e-06, "loss": 3.806, "step": 1302 }, { "epoch": 0.4441036128152693, "grad_norm": 17.294408071302772, "learning_rate": 9.930460006159715e-06, "loss": 3.4311, "step": 1303 }, { "epoch": 0.4444444444444444, "grad_norm": 25.566965330074048, "learning_rate": 9.9301300300169e-06, "loss": 3.4183, "step": 1304 }, { "epoch": 0.4447852760736196, "grad_norm": 18.541284454599168, "learning_rate": 9.929799278345006e-06, "loss": 3.278, "step": 1305 }, { "epoch": 0.4451261077027948, "grad_norm": 23.860202309718726, "learning_rate": 9.929467751196062e-06, "loss": 3.5605, "step": 1306 }, { "epoch": 0.44546693933197, "grad_norm": 29.168442049675523, "learning_rate": 9.929135448622219e-06, "loss": 3.9469, "step": 1307 }, { "epoch": 0.4458077709611452, "grad_norm": 17.205703915947037, "learning_rate": 9.928802370675746e-06, "loss": 3.0094, "step": 1308 }, { "epoch": 0.4461486025903204, "grad_norm": 26.886089682180632, "learning_rate": 9.92846851740904e-06, "loss": 3.5325, "step": 1309 }, { "epoch": 0.44648943421949555, "grad_norm": 20.16688695287021, "learning_rate": 9.928133888874617e-06, "loss": 3.2827, "step": 1310 }, { "epoch": 0.44683026584867075, "grad_norm": 25.08825652133083, "learning_rate": 9.927798485125116e-06, "loss": 3.3124, "step": 1311 }, { "epoch": 0.44717109747784595, "grad_norm": 15.701382375370306, "learning_rate": 9.927462306213297e-06, "loss": 3.428, "step": 1312 }, { "epoch": 0.44751192910702114, "grad_norm": 35.59945615178777, "learning_rate": 9.927125352192043e-06, "loss": 3.5128, "step": 1313 }, { "epoch": 0.44785276073619634, "grad_norm": 25.47516493333518, "learning_rate": 9.926787623114355e-06, "loss": 3.9966, "step": 1314 }, { "epoch": 0.4481935923653715, "grad_norm": 20.202065410919403, "learning_rate": 9.926449119033362e-06, "loss": 2.9776, "step": 1315 }, { "epoch": 0.4485344239945467, "grad_norm": 22.287644530407693, "learning_rate": 9.926109840002312e-06, "loss": 3.188, "step": 1316 }, { "epoch": 0.4488752556237219, "grad_norm": 18.11901462327546, "learning_rate": 9.925769786074576e-06, "loss": 3.6658, "step": 1317 }, { "epoch": 0.4492160872528971, "grad_norm": 21.4696781001905, "learning_rate": 9.925428957303642e-06, "loss": 3.4313, "step": 1318 }, { "epoch": 0.4495569188820723, "grad_norm": 19.117122410707882, "learning_rate": 9.925087353743128e-06, "loss": 3.5096, "step": 1319 }, { "epoch": 0.4498977505112474, "grad_norm": 15.662249673353935, "learning_rate": 9.924744975446764e-06, "loss": 3.1163, "step": 1320 }, { "epoch": 0.4502385821404226, "grad_norm": 21.035204487644233, "learning_rate": 9.924401822468415e-06, "loss": 3.1549, "step": 1321 }, { "epoch": 0.4505794137695978, "grad_norm": 34.8105610965106, "learning_rate": 9.924057894862056e-06, "loss": 3.8211, "step": 1322 }, { "epoch": 0.450920245398773, "grad_norm": 37.73434150267029, "learning_rate": 9.923713192681788e-06, "loss": 3.4141, "step": 1323 }, { "epoch": 0.4512610770279482, "grad_norm": 17.599889165856055, "learning_rate": 9.923367715981833e-06, "loss": 3.5111, "step": 1324 }, { "epoch": 0.4516019086571234, "grad_norm": 14.264125432332277, "learning_rate": 9.923021464816539e-06, "loss": 3.2514, "step": 1325 }, { "epoch": 0.45194274028629855, "grad_norm": 15.59071385296851, "learning_rate": 9.922674439240371e-06, "loss": 3.1986, "step": 1326 }, { "epoch": 0.45228357191547375, "grad_norm": 20.320557409623483, "learning_rate": 9.922326639307918e-06, "loss": 3.5042, "step": 1327 }, { "epoch": 0.45262440354464895, "grad_norm": 22.541848392931826, "learning_rate": 9.92197806507389e-06, "loss": 3.2427, "step": 1328 }, { "epoch": 0.45296523517382414, "grad_norm": 17.28874126222633, "learning_rate": 9.921628716593117e-06, "loss": 3.4786, "step": 1329 }, { "epoch": 0.45330606680299934, "grad_norm": 19.401473749708646, "learning_rate": 9.921278593920559e-06, "loss": 3.0076, "step": 1330 }, { "epoch": 0.4536468984321745, "grad_norm": 17.413771332026283, "learning_rate": 9.920927697111284e-06, "loss": 2.9912, "step": 1331 }, { "epoch": 0.4539877300613497, "grad_norm": 25.359905424252783, "learning_rate": 9.920576026220495e-06, "loss": 2.7707, "step": 1332 }, { "epoch": 0.4543285616905249, "grad_norm": 13.150117482106511, "learning_rate": 9.920223581303507e-06, "loss": 3.0638, "step": 1333 }, { "epoch": 0.4546693933197001, "grad_norm": 24.601313577883428, "learning_rate": 9.919870362415766e-06, "loss": 3.2811, "step": 1334 }, { "epoch": 0.4550102249488753, "grad_norm": 15.748082223093167, "learning_rate": 9.919516369612831e-06, "loss": 3.1356, "step": 1335 }, { "epoch": 0.4553510565780504, "grad_norm": 16.528480071282623, "learning_rate": 9.91916160295039e-06, "loss": 3.227, "step": 1336 }, { "epoch": 0.4556918882072256, "grad_norm": 22.250393507997394, "learning_rate": 9.918806062484244e-06, "loss": 3.7354, "step": 1337 }, { "epoch": 0.4560327198364008, "grad_norm": 21.78567235559386, "learning_rate": 9.918449748270324e-06, "loss": 3.6751, "step": 1338 }, { "epoch": 0.456373551465576, "grad_norm": 30.451089582685576, "learning_rate": 9.918092660364679e-06, "loss": 3.7318, "step": 1339 }, { "epoch": 0.4567143830947512, "grad_norm": 17.949110567841906, "learning_rate": 9.917734798823484e-06, "loss": 3.3207, "step": 1340 }, { "epoch": 0.4570552147239264, "grad_norm": 16.53405692982974, "learning_rate": 9.917376163703026e-06, "loss": 3.3129, "step": 1341 }, { "epoch": 0.45739604635310155, "grad_norm": 16.807318821262047, "learning_rate": 9.917016755059723e-06, "loss": 2.7985, "step": 1342 }, { "epoch": 0.45773687798227675, "grad_norm": 22.112568998286715, "learning_rate": 9.91665657295011e-06, "loss": 3.3293, "step": 1343 }, { "epoch": 0.45807770961145194, "grad_norm": 14.48657604825247, "learning_rate": 9.916295617430848e-06, "loss": 3.4248, "step": 1344 }, { "epoch": 0.45841854124062714, "grad_norm": 67.40001023944164, "learning_rate": 9.915933888558713e-06, "loss": 3.4829, "step": 1345 }, { "epoch": 0.45875937286980234, "grad_norm": 26.792792050176892, "learning_rate": 9.915571386390609e-06, "loss": 3.1143, "step": 1346 }, { "epoch": 0.4591002044989775, "grad_norm": 28.15838079189428, "learning_rate": 9.915208110983559e-06, "loss": 3.9373, "step": 1347 }, { "epoch": 0.4594410361281527, "grad_norm": 20.727032006927367, "learning_rate": 9.914844062394704e-06, "loss": 3.2286, "step": 1348 }, { "epoch": 0.4597818677573279, "grad_norm": 15.113211676238658, "learning_rate": 9.914479240681316e-06, "loss": 3.2959, "step": 1349 }, { "epoch": 0.4601226993865031, "grad_norm": 21.705596314464017, "learning_rate": 9.914113645900779e-06, "loss": 3.7463, "step": 1350 }, { "epoch": 0.4604635310156783, "grad_norm": 21.084638991505766, "learning_rate": 9.913747278110603e-06, "loss": 3.5771, "step": 1351 }, { "epoch": 0.4608043626448534, "grad_norm": 23.957372807547685, "learning_rate": 9.91338013736842e-06, "loss": 3.727, "step": 1352 }, { "epoch": 0.4611451942740286, "grad_norm": 39.556333682076506, "learning_rate": 9.913012223731982e-06, "loss": 3.1418, "step": 1353 }, { "epoch": 0.4614860259032038, "grad_norm": 22.205602204419737, "learning_rate": 9.912643537259165e-06, "loss": 3.2436, "step": 1354 }, { "epoch": 0.461826857532379, "grad_norm": 21.36507621385849, "learning_rate": 9.912274078007962e-06, "loss": 3.1771, "step": 1355 }, { "epoch": 0.4621676891615542, "grad_norm": 20.060217181849783, "learning_rate": 9.911903846036493e-06, "loss": 3.5251, "step": 1356 }, { "epoch": 0.4625085207907294, "grad_norm": 25.521153560730564, "learning_rate": 9.911532841402996e-06, "loss": 3.7046, "step": 1357 }, { "epoch": 0.46284935241990455, "grad_norm": 31.489152081960793, "learning_rate": 9.91116106416583e-06, "loss": 3.7034, "step": 1358 }, { "epoch": 0.46319018404907975, "grad_norm": 17.914329065923788, "learning_rate": 9.910788514383479e-06, "loss": 3.1938, "step": 1359 }, { "epoch": 0.46353101567825494, "grad_norm": 21.32971450454713, "learning_rate": 9.910415192114547e-06, "loss": 3.4244, "step": 1360 }, { "epoch": 0.46387184730743014, "grad_norm": 22.42196832689618, "learning_rate": 9.910041097417756e-06, "loss": 3.7876, "step": 1361 }, { "epoch": 0.46421267893660534, "grad_norm": 26.009610911776434, "learning_rate": 9.909666230351955e-06, "loss": 3.48, "step": 1362 }, { "epoch": 0.4645535105657805, "grad_norm": 15.387337542674189, "learning_rate": 9.909290590976113e-06, "loss": 3.1548, "step": 1363 }, { "epoch": 0.4648943421949557, "grad_norm": 25.984563999303468, "learning_rate": 9.908914179349319e-06, "loss": 3.3753, "step": 1364 }, { "epoch": 0.4652351738241309, "grad_norm": 18.073034947004505, "learning_rate": 9.908536995530783e-06, "loss": 3.6673, "step": 1365 }, { "epoch": 0.4655760054533061, "grad_norm": 16.377488868717066, "learning_rate": 9.908159039579836e-06, "loss": 3.3102, "step": 1366 }, { "epoch": 0.46591683708248127, "grad_norm": 25.923084902838944, "learning_rate": 9.907780311555935e-06, "loss": 3.3404, "step": 1367 }, { "epoch": 0.4662576687116564, "grad_norm": 18.955340516919737, "learning_rate": 9.907400811518653e-06, "loss": 3.5062, "step": 1368 }, { "epoch": 0.4665985003408316, "grad_norm": 17.810659979917745, "learning_rate": 9.90702053952769e-06, "loss": 2.989, "step": 1369 }, { "epoch": 0.4669393319700068, "grad_norm": 19.09820863891688, "learning_rate": 9.90663949564286e-06, "loss": 3.4141, "step": 1370 }, { "epoch": 0.467280163599182, "grad_norm": 27.99861527734652, "learning_rate": 9.906257679924107e-06, "loss": 4.0354, "step": 1371 }, { "epoch": 0.4676209952283572, "grad_norm": 21.84215755578167, "learning_rate": 9.90587509243149e-06, "loss": 3.8505, "step": 1372 }, { "epoch": 0.4679618268575324, "grad_norm": 22.90212309683293, "learning_rate": 9.905491733225193e-06, "loss": 3.3077, "step": 1373 }, { "epoch": 0.46830265848670755, "grad_norm": 26.84186688749521, "learning_rate": 9.905107602365517e-06, "loss": 3.4043, "step": 1374 }, { "epoch": 0.46864349011588274, "grad_norm": 20.19973158812703, "learning_rate": 9.90472269991289e-06, "loss": 3.4343, "step": 1375 }, { "epoch": 0.46898432174505794, "grad_norm": 20.831814005167, "learning_rate": 9.904337025927857e-06, "loss": 3.6319, "step": 1376 }, { "epoch": 0.46932515337423314, "grad_norm": 13.616022184031408, "learning_rate": 9.903950580471086e-06, "loss": 3.4311, "step": 1377 }, { "epoch": 0.46966598500340834, "grad_norm": 11.477658281098835, "learning_rate": 9.903563363603368e-06, "loss": 3.325, "step": 1378 }, { "epoch": 0.4700068166325835, "grad_norm": 23.36313277614423, "learning_rate": 9.903175375385614e-06, "loss": 4.231, "step": 1379 }, { "epoch": 0.4703476482617587, "grad_norm": 20.87664480952871, "learning_rate": 9.902786615878855e-06, "loss": 3.9094, "step": 1380 }, { "epoch": 0.4706884798909339, "grad_norm": 18.599177192921395, "learning_rate": 9.902397085144244e-06, "loss": 3.1055, "step": 1381 }, { "epoch": 0.4710293115201091, "grad_norm": 24.296852677979636, "learning_rate": 9.902006783243054e-06, "loss": 3.104, "step": 1382 }, { "epoch": 0.47137014314928427, "grad_norm": 19.774954923500378, "learning_rate": 9.901615710236685e-06, "loss": 3.022, "step": 1383 }, { "epoch": 0.47171097477845947, "grad_norm": 28.611433073881983, "learning_rate": 9.901223866186653e-06, "loss": 3.4924, "step": 1384 }, { "epoch": 0.4720518064076346, "grad_norm": 16.775051047976614, "learning_rate": 9.900831251154596e-06, "loss": 3.1574, "step": 1385 }, { "epoch": 0.4723926380368098, "grad_norm": 28.288287060215296, "learning_rate": 9.900437865202275e-06, "loss": 3.9469, "step": 1386 }, { "epoch": 0.472733469665985, "grad_norm": 15.315391439242383, "learning_rate": 9.90004370839157e-06, "loss": 3.2715, "step": 1387 }, { "epoch": 0.4730743012951602, "grad_norm": 14.692688934693367, "learning_rate": 9.899648780784482e-06, "loss": 3.0429, "step": 1388 }, { "epoch": 0.4734151329243354, "grad_norm": 18.05068175738741, "learning_rate": 9.899253082443138e-06, "loss": 3.0802, "step": 1389 }, { "epoch": 0.47375596455351054, "grad_norm": 12.279897547837207, "learning_rate": 9.89885661342978e-06, "loss": 3.021, "step": 1390 }, { "epoch": 0.47409679618268574, "grad_norm": 16.66204913977151, "learning_rate": 9.898459373806776e-06, "loss": 3.3303, "step": 1391 }, { "epoch": 0.47443762781186094, "grad_norm": 14.044315460655591, "learning_rate": 9.898061363636614e-06, "loss": 2.9206, "step": 1392 }, { "epoch": 0.47477845944103614, "grad_norm": 13.775958690207847, "learning_rate": 9.8976625829819e-06, "loss": 3.0537, "step": 1393 }, { "epoch": 0.47511929107021134, "grad_norm": 11.751090229703895, "learning_rate": 9.897263031905366e-06, "loss": 2.9268, "step": 1394 }, { "epoch": 0.4754601226993865, "grad_norm": 12.284157146171538, "learning_rate": 9.896862710469862e-06, "loss": 3.0942, "step": 1395 }, { "epoch": 0.4758009543285617, "grad_norm": 30.458505678821325, "learning_rate": 9.89646161873836e-06, "loss": 4.0367, "step": 1396 }, { "epoch": 0.4761417859577369, "grad_norm": 23.239595257237596, "learning_rate": 9.896059756773954e-06, "loss": 3.4346, "step": 1397 }, { "epoch": 0.47648261758691207, "grad_norm": 20.03817938709607, "learning_rate": 9.89565712463986e-06, "loss": 3.2087, "step": 1398 }, { "epoch": 0.47682344921608727, "grad_norm": 21.70792339959685, "learning_rate": 9.89525372239941e-06, "loss": 3.0152, "step": 1399 }, { "epoch": 0.47716428084526247, "grad_norm": 19.245943232007658, "learning_rate": 9.89484955011606e-06, "loss": 2.8573, "step": 1400 }, { "epoch": 0.4775051124744376, "grad_norm": 13.327048925542654, "learning_rate": 9.894444607853394e-06, "loss": 2.7419, "step": 1401 }, { "epoch": 0.4778459441036128, "grad_norm": 16.194779038932634, "learning_rate": 9.894038895675106e-06, "loss": 3.0109, "step": 1402 }, { "epoch": 0.478186775732788, "grad_norm": 28.37837825908542, "learning_rate": 9.89363241364502e-06, "loss": 3.7048, "step": 1403 }, { "epoch": 0.4785276073619632, "grad_norm": 14.162264883308696, "learning_rate": 9.893225161827073e-06, "loss": 2.9237, "step": 1404 }, { "epoch": 0.4788684389911384, "grad_norm": 15.687705330633051, "learning_rate": 9.892817140285328e-06, "loss": 3.1812, "step": 1405 }, { "epoch": 0.47920927062031354, "grad_norm": 25.017223446779024, "learning_rate": 9.892408349083972e-06, "loss": 3.1272, "step": 1406 }, { "epoch": 0.47955010224948874, "grad_norm": 10.889274858598391, "learning_rate": 9.891998788287307e-06, "loss": 2.7111, "step": 1407 }, { "epoch": 0.47989093387866394, "grad_norm": 15.705354735066164, "learning_rate": 9.891588457959758e-06, "loss": 3.4863, "step": 1408 }, { "epoch": 0.48023176550783914, "grad_norm": 20.39521788141351, "learning_rate": 9.89117735816587e-06, "loss": 3.6017, "step": 1409 }, { "epoch": 0.48057259713701433, "grad_norm": 28.357659717505072, "learning_rate": 9.890765488970317e-06, "loss": 3.2948, "step": 1410 }, { "epoch": 0.4809134287661895, "grad_norm": 15.352861623318987, "learning_rate": 9.89035285043788e-06, "loss": 3.2627, "step": 1411 }, { "epoch": 0.4812542603953647, "grad_norm": 23.589323729570992, "learning_rate": 9.889939442633475e-06, "loss": 3.1794, "step": 1412 }, { "epoch": 0.4815950920245399, "grad_norm": 17.489197024291006, "learning_rate": 9.88952526562213e-06, "loss": 2.9441, "step": 1413 }, { "epoch": 0.48193592365371507, "grad_norm": 27.005934948484622, "learning_rate": 9.889110319468995e-06, "loss": 3.4281, "step": 1414 }, { "epoch": 0.48227675528289027, "grad_norm": 18.539069799377945, "learning_rate": 9.888694604239345e-06, "loss": 3.3327, "step": 1415 }, { "epoch": 0.48261758691206547, "grad_norm": 24.261681720297897, "learning_rate": 9.888278119998573e-06, "loss": 3.3178, "step": 1416 }, { "epoch": 0.4829584185412406, "grad_norm": 24.319891138396788, "learning_rate": 9.887860866812193e-06, "loss": 4.0272, "step": 1417 }, { "epoch": 0.4832992501704158, "grad_norm": 22.29386927965848, "learning_rate": 9.887442844745843e-06, "loss": 3.8549, "step": 1418 }, { "epoch": 0.483640081799591, "grad_norm": 18.95886003716071, "learning_rate": 9.887024053865276e-06, "loss": 3.5539, "step": 1419 }, { "epoch": 0.4839809134287662, "grad_norm": 23.006481879020416, "learning_rate": 9.886604494236371e-06, "loss": 3.5862, "step": 1420 }, { "epoch": 0.4843217450579414, "grad_norm": 19.959376117192033, "learning_rate": 9.886184165925128e-06, "loss": 3.3702, "step": 1421 }, { "epoch": 0.48466257668711654, "grad_norm": 21.78534514799325, "learning_rate": 9.885763068997664e-06, "loss": 3.4926, "step": 1422 }, { "epoch": 0.48500340831629174, "grad_norm": 22.862353893347308, "learning_rate": 9.88534120352022e-06, "loss": 3.1608, "step": 1423 }, { "epoch": 0.48534423994546694, "grad_norm": 21.208608006438084, "learning_rate": 9.884918569559157e-06, "loss": 3.3871, "step": 1424 }, { "epoch": 0.48568507157464214, "grad_norm": 16.006091098304996, "learning_rate": 9.88449516718096e-06, "loss": 3.2517, "step": 1425 }, { "epoch": 0.48602590320381733, "grad_norm": 18.700120989882965, "learning_rate": 9.884070996452226e-06, "loss": 3.2115, "step": 1426 }, { "epoch": 0.4863667348329925, "grad_norm": 17.811188371987623, "learning_rate": 9.883646057439681e-06, "loss": 3.5495, "step": 1427 }, { "epoch": 0.4867075664621677, "grad_norm": 20.59931611426034, "learning_rate": 9.883220350210173e-06, "loss": 3.3206, "step": 1428 }, { "epoch": 0.48704839809134287, "grad_norm": 12.741973722614254, "learning_rate": 9.882793874830665e-06, "loss": 2.5837, "step": 1429 }, { "epoch": 0.48738922972051807, "grad_norm": 47.36273251105926, "learning_rate": 9.882366631368243e-06, "loss": 3.8457, "step": 1430 }, { "epoch": 0.48773006134969327, "grad_norm": 18.268341945462026, "learning_rate": 9.881938619890114e-06, "loss": 3.4347, "step": 1431 }, { "epoch": 0.48807089297886846, "grad_norm": 12.112871820639358, "learning_rate": 9.881509840463605e-06, "loss": 2.9547, "step": 1432 }, { "epoch": 0.4884117246080436, "grad_norm": 20.771223372065126, "learning_rate": 9.881080293156166e-06, "loss": 3.7989, "step": 1433 }, { "epoch": 0.4887525562372188, "grad_norm": 14.504243169247511, "learning_rate": 9.880649978035368e-06, "loss": 3.1705, "step": 1434 }, { "epoch": 0.489093387866394, "grad_norm": 15.276379574992415, "learning_rate": 9.880218895168901e-06, "loss": 3.3698, "step": 1435 }, { "epoch": 0.4894342194955692, "grad_norm": 17.82870641841164, "learning_rate": 9.879787044624571e-06, "loss": 3.5095, "step": 1436 }, { "epoch": 0.4897750511247444, "grad_norm": 21.299159259589153, "learning_rate": 9.879354426470316e-06, "loss": 4.0162, "step": 1437 }, { "epoch": 0.49011588275391954, "grad_norm": 21.787565423265086, "learning_rate": 9.878921040774186e-06, "loss": 3.6665, "step": 1438 }, { "epoch": 0.49045671438309474, "grad_norm": 27.661342782102746, "learning_rate": 9.878486887604354e-06, "loss": 3.646, "step": 1439 }, { "epoch": 0.49079754601226994, "grad_norm": 14.464310133780947, "learning_rate": 9.878051967029114e-06, "loss": 3.6752, "step": 1440 }, { "epoch": 0.49113837764144513, "grad_norm": 30.97829951958, "learning_rate": 9.877616279116882e-06, "loss": 3.7221, "step": 1441 }, { "epoch": 0.49147920927062033, "grad_norm": 19.343023703250715, "learning_rate": 9.877179823936192e-06, "loss": 3.6507, "step": 1442 }, { "epoch": 0.4918200408997955, "grad_norm": 19.769706148319564, "learning_rate": 9.876742601555702e-06, "loss": 3.6298, "step": 1443 }, { "epoch": 0.49216087252897067, "grad_norm": 20.691737109203775, "learning_rate": 9.876304612044186e-06, "loss": 3.5661, "step": 1444 }, { "epoch": 0.49250170415814587, "grad_norm": 16.84617621272282, "learning_rate": 9.875865855470543e-06, "loss": 3.3524, "step": 1445 }, { "epoch": 0.49284253578732107, "grad_norm": 20.04025309842055, "learning_rate": 9.875426331903792e-06, "loss": 3.3857, "step": 1446 }, { "epoch": 0.49318336741649627, "grad_norm": 19.643142115625395, "learning_rate": 9.874986041413072e-06, "loss": 3.2521, "step": 1447 }, { "epoch": 0.49352419904567146, "grad_norm": 19.431213328412806, "learning_rate": 9.87454498406764e-06, "loss": 3.5349, "step": 1448 }, { "epoch": 0.4938650306748466, "grad_norm": 29.118623230362207, "learning_rate": 9.874103159936879e-06, "loss": 3.9259, "step": 1449 }, { "epoch": 0.4942058623040218, "grad_norm": 14.462070653294717, "learning_rate": 9.873660569090286e-06, "loss": 2.8797, "step": 1450 }, { "epoch": 0.494546693933197, "grad_norm": 19.506563844788015, "learning_rate": 9.873217211597487e-06, "loss": 3.4225, "step": 1451 }, { "epoch": 0.4948875255623722, "grad_norm": 22.058615911209994, "learning_rate": 9.87277308752822e-06, "loss": 3.5253, "step": 1452 }, { "epoch": 0.4952283571915474, "grad_norm": 20.084471800069092, "learning_rate": 9.87232819695235e-06, "loss": 3.6815, "step": 1453 }, { "epoch": 0.49556918882072254, "grad_norm": 21.763220959022675, "learning_rate": 9.871882539939858e-06, "loss": 3.6426, "step": 1454 }, { "epoch": 0.49591002044989774, "grad_norm": 19.387041357241856, "learning_rate": 9.871436116560848e-06, "loss": 3.3174, "step": 1455 }, { "epoch": 0.49625085207907293, "grad_norm": 18.401665987160555, "learning_rate": 9.870988926885547e-06, "loss": 3.4638, "step": 1456 }, { "epoch": 0.49659168370824813, "grad_norm": 20.669051354377007, "learning_rate": 9.870540970984295e-06, "loss": 3.5606, "step": 1457 }, { "epoch": 0.49693251533742333, "grad_norm": 10.786420616393665, "learning_rate": 9.87009224892756e-06, "loss": 2.5839, "step": 1458 }, { "epoch": 0.4972733469665985, "grad_norm": 19.38191499159506, "learning_rate": 9.86964276078593e-06, "loss": 2.9767, "step": 1459 }, { "epoch": 0.49761417859577367, "grad_norm": 24.004253224907508, "learning_rate": 9.869192506630108e-06, "loss": 3.342, "step": 1460 }, { "epoch": 0.49795501022494887, "grad_norm": 16.6465397107678, "learning_rate": 9.86874148653092e-06, "loss": 3.0304, "step": 1461 }, { "epoch": 0.49829584185412407, "grad_norm": 22.146288602806564, "learning_rate": 9.868289700559316e-06, "loss": 3.2289, "step": 1462 }, { "epoch": 0.49863667348329926, "grad_norm": 19.843855545461913, "learning_rate": 9.867837148786362e-06, "loss": 3.3373, "step": 1463 }, { "epoch": 0.49897750511247446, "grad_norm": 30.334947623252884, "learning_rate": 9.867383831283247e-06, "loss": 3.615, "step": 1464 }, { "epoch": 0.4993183367416496, "grad_norm": 14.015059432838122, "learning_rate": 9.86692974812128e-06, "loss": 2.9441, "step": 1465 }, { "epoch": 0.4996591683708248, "grad_norm": 22.641918064432012, "learning_rate": 9.866474899371889e-06, "loss": 3.0232, "step": 1466 }, { "epoch": 0.5, "grad_norm": 20.60022919078122, "learning_rate": 9.866019285106623e-06, "loss": 3.8762, "step": 1467 }, { "epoch": 0.5003408316291752, "grad_norm": 13.503331653647683, "learning_rate": 9.865562905397155e-06, "loss": 2.933, "step": 1468 }, { "epoch": 0.5006816632583504, "grad_norm": 30.069052317624163, "learning_rate": 9.865105760315273e-06, "loss": 3.1253, "step": 1469 }, { "epoch": 0.5010224948875256, "grad_norm": 18.274629341007465, "learning_rate": 9.864647849932885e-06, "loss": 3.7296, "step": 1470 }, { "epoch": 0.5013633265167008, "grad_norm": 18.943607546875764, "learning_rate": 9.864189174322028e-06, "loss": 3.1164, "step": 1471 }, { "epoch": 0.501704158145876, "grad_norm": 37.02505145895467, "learning_rate": 9.86372973355485e-06, "loss": 3.4828, "step": 1472 }, { "epoch": 0.5020449897750511, "grad_norm": 22.501411635369788, "learning_rate": 9.863269527703625e-06, "loss": 3.0979, "step": 1473 }, { "epoch": 0.5023858214042263, "grad_norm": 15.40704590719538, "learning_rate": 9.862808556840742e-06, "loss": 3.2368, "step": 1474 }, { "epoch": 0.5027266530334015, "grad_norm": 22.31105162704072, "learning_rate": 9.862346821038715e-06, "loss": 3.9659, "step": 1475 }, { "epoch": 0.5030674846625767, "grad_norm": 19.821397992069933, "learning_rate": 9.861884320370177e-06, "loss": 3.134, "step": 1476 }, { "epoch": 0.5034083162917519, "grad_norm": 39.097673616084386, "learning_rate": 9.861421054907882e-06, "loss": 3.525, "step": 1477 }, { "epoch": 0.5037491479209271, "grad_norm": 25.01720808251774, "learning_rate": 9.860957024724704e-06, "loss": 3.3522, "step": 1478 }, { "epoch": 0.5040899795501023, "grad_norm": 17.576717191222194, "learning_rate": 9.860492229893634e-06, "loss": 3.6972, "step": 1479 }, { "epoch": 0.5044308111792775, "grad_norm": 12.626978261019117, "learning_rate": 9.86002667048779e-06, "loss": 2.67, "step": 1480 }, { "epoch": 0.5047716428084527, "grad_norm": 23.247648062535642, "learning_rate": 9.859560346580402e-06, "loss": 3.2598, "step": 1481 }, { "epoch": 0.5051124744376279, "grad_norm": 18.456353109544878, "learning_rate": 9.859093258244826e-06, "loss": 3.3604, "step": 1482 }, { "epoch": 0.505453306066803, "grad_norm": 17.37281536993216, "learning_rate": 9.85862540555454e-06, "loss": 3.2297, "step": 1483 }, { "epoch": 0.5057941376959781, "grad_norm": 14.417659205408224, "learning_rate": 9.858156788583135e-06, "loss": 2.8988, "step": 1484 }, { "epoch": 0.5061349693251533, "grad_norm": 16.169807907080862, "learning_rate": 9.857687407404328e-06, "loss": 2.994, "step": 1485 }, { "epoch": 0.5064758009543285, "grad_norm": 12.561483755633231, "learning_rate": 9.857217262091954e-06, "loss": 2.802, "step": 1486 }, { "epoch": 0.5068166325835037, "grad_norm": 18.227128515839404, "learning_rate": 9.85674635271997e-06, "loss": 3.2977, "step": 1487 }, { "epoch": 0.5071574642126789, "grad_norm": 37.93226663069863, "learning_rate": 9.856274679362451e-06, "loss": 3.6507, "step": 1488 }, { "epoch": 0.5074982958418541, "grad_norm": 15.159854356352005, "learning_rate": 9.855802242093594e-06, "loss": 3.0808, "step": 1489 }, { "epoch": 0.5078391274710293, "grad_norm": 22.517404815996226, "learning_rate": 9.855329040987714e-06, "loss": 2.9795, "step": 1490 }, { "epoch": 0.5081799591002045, "grad_norm": 16.760231990468657, "learning_rate": 9.854855076119247e-06, "loss": 3.231, "step": 1491 }, { "epoch": 0.5085207907293797, "grad_norm": 17.90418969245724, "learning_rate": 9.854380347562752e-06, "loss": 3.0522, "step": 1492 }, { "epoch": 0.5088616223585549, "grad_norm": 18.20400947887226, "learning_rate": 9.853904855392903e-06, "loss": 3.7319, "step": 1493 }, { "epoch": 0.50920245398773, "grad_norm": 18.64045895352656, "learning_rate": 9.853428599684498e-06, "loss": 3.5467, "step": 1494 }, { "epoch": 0.5095432856169052, "grad_norm": 19.134270141609278, "learning_rate": 9.852951580512455e-06, "loss": 3.0835, "step": 1495 }, { "epoch": 0.5098841172460804, "grad_norm": 14.70362076084693, "learning_rate": 9.85247379795181e-06, "loss": 2.3288, "step": 1496 }, { "epoch": 0.5102249488752556, "grad_norm": 24.71588228051837, "learning_rate": 9.851995252077718e-06, "loss": 3.1787, "step": 1497 }, { "epoch": 0.5105657805044308, "grad_norm": 16.264941947327777, "learning_rate": 9.85151594296546e-06, "loss": 3.5998, "step": 1498 }, { "epoch": 0.510906612133606, "grad_norm": 21.2281478996706, "learning_rate": 9.85103587069043e-06, "loss": 3.518, "step": 1499 }, { "epoch": 0.5112474437627812, "grad_norm": 14.837927787896014, "learning_rate": 9.850555035328147e-06, "loss": 3.0262, "step": 1500 }, { "epoch": 0.5115882753919564, "grad_norm": 16.933181713093916, "learning_rate": 9.850073436954248e-06, "loss": 3.1886, "step": 1501 }, { "epoch": 0.5119291070211316, "grad_norm": 24.801652949051785, "learning_rate": 9.849591075644491e-06, "loss": 3.6613, "step": 1502 }, { "epoch": 0.5122699386503068, "grad_norm": 16.215628558427305, "learning_rate": 9.849107951474754e-06, "loss": 3.2878, "step": 1503 }, { "epoch": 0.512610770279482, "grad_norm": 21.94114164491225, "learning_rate": 9.84862406452103e-06, "loss": 3.3205, "step": 1504 }, { "epoch": 0.5129516019086571, "grad_norm": 18.429871782390133, "learning_rate": 9.848139414859441e-06, "loss": 3.5262, "step": 1505 }, { "epoch": 0.5132924335378323, "grad_norm": 13.077439367623349, "learning_rate": 9.847654002566223e-06, "loss": 3.2146, "step": 1506 }, { "epoch": 0.5136332651670075, "grad_norm": 18.6750115742223, "learning_rate": 9.847167827717732e-06, "loss": 3.3218, "step": 1507 }, { "epoch": 0.5139740967961827, "grad_norm": 14.960405424219784, "learning_rate": 9.846680890390448e-06, "loss": 3.2697, "step": 1508 }, { "epoch": 0.5143149284253579, "grad_norm": 16.557908136676847, "learning_rate": 9.846193190660963e-06, "loss": 3.6454, "step": 1509 }, { "epoch": 0.5146557600545331, "grad_norm": 39.35260863696515, "learning_rate": 9.845704728606002e-06, "loss": 2.9334, "step": 1510 }, { "epoch": 0.5149965916837083, "grad_norm": 22.924413332295792, "learning_rate": 9.845215504302395e-06, "loss": 2.9324, "step": 1511 }, { "epoch": 0.5153374233128835, "grad_norm": 17.340290195211917, "learning_rate": 9.844725517827103e-06, "loss": 3.2726, "step": 1512 }, { "epoch": 0.5156782549420587, "grad_norm": 29.529971047192003, "learning_rate": 9.844234769257199e-06, "loss": 3.7938, "step": 1513 }, { "epoch": 0.5160190865712339, "grad_norm": 14.147812617472812, "learning_rate": 9.843743258669884e-06, "loss": 3.0679, "step": 1514 }, { "epoch": 0.516359918200409, "grad_norm": 21.02012687391246, "learning_rate": 9.843250986142472e-06, "loss": 3.503, "step": 1515 }, { "epoch": 0.5167007498295841, "grad_norm": 12.28752844832706, "learning_rate": 9.842757951752399e-06, "loss": 2.9297, "step": 1516 }, { "epoch": 0.5170415814587593, "grad_norm": 17.766570490289503, "learning_rate": 9.842264155577224e-06, "loss": 3.4262, "step": 1517 }, { "epoch": 0.5173824130879345, "grad_norm": 25.15880555586446, "learning_rate": 9.84176959769462e-06, "loss": 2.9639, "step": 1518 }, { "epoch": 0.5177232447171097, "grad_norm": 25.657478030853603, "learning_rate": 9.841274278182385e-06, "loss": 3.6734, "step": 1519 }, { "epoch": 0.5180640763462849, "grad_norm": 42.24240323478798, "learning_rate": 9.840778197118432e-06, "loss": 3.4465, "step": 1520 }, { "epoch": 0.5184049079754601, "grad_norm": 18.860358432617478, "learning_rate": 9.8402813545808e-06, "loss": 3.4771, "step": 1521 }, { "epoch": 0.5187457396046353, "grad_norm": 17.424658832712826, "learning_rate": 9.839783750647644e-06, "loss": 3.3126, "step": 1522 }, { "epoch": 0.5190865712338105, "grad_norm": 14.195987245475699, "learning_rate": 9.839285385397236e-06, "loss": 2.7686, "step": 1523 }, { "epoch": 0.5194274028629857, "grad_norm": 31.809597811474323, "learning_rate": 9.838786258907975e-06, "loss": 3.1601, "step": 1524 }, { "epoch": 0.5197682344921609, "grad_norm": 16.758837510060033, "learning_rate": 9.838286371258373e-06, "loss": 3.3856, "step": 1525 }, { "epoch": 0.520109066121336, "grad_norm": 18.154773271648114, "learning_rate": 9.837785722527064e-06, "loss": 3.2396, "step": 1526 }, { "epoch": 0.5204498977505112, "grad_norm": 17.572187491039667, "learning_rate": 9.837284312792804e-06, "loss": 3.6367, "step": 1527 }, { "epoch": 0.5207907293796864, "grad_norm": 13.945193262325507, "learning_rate": 9.836782142134464e-06, "loss": 2.9765, "step": 1528 }, { "epoch": 0.5211315610088616, "grad_norm": 13.001474106279495, "learning_rate": 9.83627921063104e-06, "loss": 3.1411, "step": 1529 }, { "epoch": 0.5214723926380368, "grad_norm": 16.96862665021584, "learning_rate": 9.835775518361641e-06, "loss": 3.025, "step": 1530 }, { "epoch": 0.521813224267212, "grad_norm": 19.074402296883825, "learning_rate": 9.835271065405505e-06, "loss": 3.3979, "step": 1531 }, { "epoch": 0.5221540558963872, "grad_norm": 15.646227508659338, "learning_rate": 9.834765851841982e-06, "loss": 2.9912, "step": 1532 }, { "epoch": 0.5224948875255624, "grad_norm": 18.987463863690124, "learning_rate": 9.834259877750546e-06, "loss": 3.4337, "step": 1533 }, { "epoch": 0.5228357191547376, "grad_norm": 30.158094864341944, "learning_rate": 9.833753143210786e-06, "loss": 3.855, "step": 1534 }, { "epoch": 0.5231765507839128, "grad_norm": 18.092710190090514, "learning_rate": 9.833245648302413e-06, "loss": 3.0971, "step": 1535 }, { "epoch": 0.523517382413088, "grad_norm": 32.45604763067535, "learning_rate": 9.832737393105263e-06, "loss": 3.8787, "step": 1536 }, { "epoch": 0.5238582140422631, "grad_norm": 16.516283628170143, "learning_rate": 9.83222837769928e-06, "loss": 3.3238, "step": 1537 }, { "epoch": 0.5241990456714383, "grad_norm": 25.143736025965485, "learning_rate": 9.83171860216454e-06, "loss": 3.5386, "step": 1538 }, { "epoch": 0.5245398773006135, "grad_norm": 18.05832553524198, "learning_rate": 9.831208066581227e-06, "loss": 3.4275, "step": 1539 }, { "epoch": 0.5248807089297887, "grad_norm": 26.338051961660387, "learning_rate": 9.830696771029655e-06, "loss": 3.3948, "step": 1540 }, { "epoch": 0.5252215405589639, "grad_norm": 17.566181895848874, "learning_rate": 9.83018471559025e-06, "loss": 3.462, "step": 1541 }, { "epoch": 0.5255623721881391, "grad_norm": 21.42940509503575, "learning_rate": 9.829671900343562e-06, "loss": 3.7183, "step": 1542 }, { "epoch": 0.5259032038173143, "grad_norm": 17.171087361218188, "learning_rate": 9.829158325370259e-06, "loss": 3.0027, "step": 1543 }, { "epoch": 0.5262440354464895, "grad_norm": 17.702420294529105, "learning_rate": 9.828643990751125e-06, "loss": 3.3262, "step": 1544 }, { "epoch": 0.5265848670756647, "grad_norm": 17.030466302876533, "learning_rate": 9.828128896567071e-06, "loss": 3.294, "step": 1545 }, { "epoch": 0.5269256987048399, "grad_norm": 18.00971013034927, "learning_rate": 9.827613042899123e-06, "loss": 3.3253, "step": 1546 }, { "epoch": 0.527266530334015, "grad_norm": 24.180215833475874, "learning_rate": 9.827096429828424e-06, "loss": 4.0187, "step": 1547 }, { "epoch": 0.5276073619631901, "grad_norm": 18.102718395705804, "learning_rate": 9.826579057436243e-06, "loss": 3.4088, "step": 1548 }, { "epoch": 0.5279481935923653, "grad_norm": 15.126789210535533, "learning_rate": 9.826060925803962e-06, "loss": 3.0297, "step": 1549 }, { "epoch": 0.5282890252215405, "grad_norm": 22.121026224817925, "learning_rate": 9.825542035013085e-06, "loss": 3.3939, "step": 1550 }, { "epoch": 0.5286298568507157, "grad_norm": 16.5307827554448, "learning_rate": 9.825022385145238e-06, "loss": 3.746, "step": 1551 }, { "epoch": 0.5289706884798909, "grad_norm": 19.56978130074802, "learning_rate": 9.824501976282162e-06, "loss": 3.5714, "step": 1552 }, { "epoch": 0.5293115201090661, "grad_norm": 19.721439059861737, "learning_rate": 9.823980808505718e-06, "loss": 3.8662, "step": 1553 }, { "epoch": 0.5296523517382413, "grad_norm": 15.168238495971588, "learning_rate": 9.82345888189789e-06, "loss": 3.1397, "step": 1554 }, { "epoch": 0.5299931833674165, "grad_norm": 47.281193660631445, "learning_rate": 9.822936196540782e-06, "loss": 3.9795, "step": 1555 }, { "epoch": 0.5303340149965917, "grad_norm": 18.81058424729779, "learning_rate": 9.822412752516609e-06, "loss": 3.5447, "step": 1556 }, { "epoch": 0.5306748466257669, "grad_norm": 16.89956120198298, "learning_rate": 9.821888549907713e-06, "loss": 3.1807, "step": 1557 }, { "epoch": 0.5310156782549421, "grad_norm": 11.907844232291687, "learning_rate": 9.821363588796553e-06, "loss": 3.174, "step": 1558 }, { "epoch": 0.5313565098841172, "grad_norm": 25.06354999859159, "learning_rate": 9.82083786926571e-06, "loss": 3.3657, "step": 1559 }, { "epoch": 0.5316973415132924, "grad_norm": 14.734146377192172, "learning_rate": 9.820311391397877e-06, "loss": 2.7441, "step": 1560 }, { "epoch": 0.5320381731424676, "grad_norm": 18.222051045016897, "learning_rate": 9.819784155275874e-06, "loss": 3.8756, "step": 1561 }, { "epoch": 0.5323790047716428, "grad_norm": 27.797341406212208, "learning_rate": 9.819256160982637e-06, "loss": 4.098, "step": 1562 }, { "epoch": 0.532719836400818, "grad_norm": 15.668848026683467, "learning_rate": 9.818727408601223e-06, "loss": 3.4028, "step": 1563 }, { "epoch": 0.5330606680299932, "grad_norm": 24.36842197197313, "learning_rate": 9.818197898214805e-06, "loss": 3.6023, "step": 1564 }, { "epoch": 0.5334014996591684, "grad_norm": 13.090273545397695, "learning_rate": 9.817667629906677e-06, "loss": 3.5448, "step": 1565 }, { "epoch": 0.5337423312883436, "grad_norm": 16.03664822972654, "learning_rate": 9.817136603760251e-06, "loss": 3.1292, "step": 1566 }, { "epoch": 0.5340831629175188, "grad_norm": 25.23053729849264, "learning_rate": 9.816604819859064e-06, "loss": 3.5512, "step": 1567 }, { "epoch": 0.534423994546694, "grad_norm": 32.18112277262398, "learning_rate": 9.816072278286763e-06, "loss": 3.3344, "step": 1568 }, { "epoch": 0.5347648261758691, "grad_norm": 17.52753073065037, "learning_rate": 9.815538979127122e-06, "loss": 3.5201, "step": 1569 }, { "epoch": 0.5351056578050443, "grad_norm": 23.728957317153238, "learning_rate": 9.81500492246403e-06, "loss": 3.1171, "step": 1570 }, { "epoch": 0.5354464894342195, "grad_norm": 19.303051827633745, "learning_rate": 9.814470108381498e-06, "loss": 3.3116, "step": 1571 }, { "epoch": 0.5357873210633947, "grad_norm": 23.890154273842533, "learning_rate": 9.813934536963649e-06, "loss": 3.2899, "step": 1572 }, { "epoch": 0.5361281526925699, "grad_norm": 20.29406598367854, "learning_rate": 9.813398208294737e-06, "loss": 3.3675, "step": 1573 }, { "epoch": 0.5364689843217451, "grad_norm": 16.29425805986339, "learning_rate": 9.812861122459125e-06, "loss": 3.311, "step": 1574 }, { "epoch": 0.5368098159509203, "grad_norm": 13.901055057801692, "learning_rate": 9.812323279541298e-06, "loss": 3.2979, "step": 1575 }, { "epoch": 0.5371506475800955, "grad_norm": 13.890331908546461, "learning_rate": 9.811784679625865e-06, "loss": 3.4154, "step": 1576 }, { "epoch": 0.5374914792092707, "grad_norm": 22.712261059068037, "learning_rate": 9.811245322797546e-06, "loss": 3.3621, "step": 1577 }, { "epoch": 0.5378323108384458, "grad_norm": 29.640060640746466, "learning_rate": 9.810705209141184e-06, "loss": 3.4965, "step": 1578 }, { "epoch": 0.538173142467621, "grad_norm": 23.373623314191434, "learning_rate": 9.810164338741745e-06, "loss": 3.6503, "step": 1579 }, { "epoch": 0.5385139740967961, "grad_norm": 20.277328983627505, "learning_rate": 9.809622711684306e-06, "loss": 3.6415, "step": 1580 }, { "epoch": 0.5388548057259713, "grad_norm": 16.228906071471375, "learning_rate": 9.809080328054067e-06, "loss": 3.2832, "step": 1581 }, { "epoch": 0.5391956373551465, "grad_norm": 24.747337601596403, "learning_rate": 9.80853718793635e-06, "loss": 3.7074, "step": 1582 }, { "epoch": 0.5395364689843217, "grad_norm": 18.25615337952711, "learning_rate": 9.807993291416594e-06, "loss": 3.0295, "step": 1583 }, { "epoch": 0.5398773006134969, "grad_norm": 15.967954945786703, "learning_rate": 9.80744863858035e-06, "loss": 3.2497, "step": 1584 }, { "epoch": 0.5402181322426721, "grad_norm": 15.864065329577292, "learning_rate": 9.8069032295133e-06, "loss": 3.3424, "step": 1585 }, { "epoch": 0.5405589638718473, "grad_norm": 16.642382938159773, "learning_rate": 9.806357064301234e-06, "loss": 3.2591, "step": 1586 }, { "epoch": 0.5408997955010225, "grad_norm": 18.09584582423569, "learning_rate": 9.805810143030072e-06, "loss": 3.7017, "step": 1587 }, { "epoch": 0.5412406271301977, "grad_norm": 14.616271752845991, "learning_rate": 9.805262465785844e-06, "loss": 3.3582, "step": 1588 }, { "epoch": 0.5415814587593729, "grad_norm": 19.149403414723615, "learning_rate": 9.804714032654698e-06, "loss": 3.3646, "step": 1589 }, { "epoch": 0.5419222903885481, "grad_norm": 20.19994017303491, "learning_rate": 9.804164843722912e-06, "loss": 3.91, "step": 1590 }, { "epoch": 0.5422631220177232, "grad_norm": 15.226810264078326, "learning_rate": 9.80361489907687e-06, "loss": 3.214, "step": 1591 }, { "epoch": 0.5426039536468984, "grad_norm": 15.17206853449658, "learning_rate": 9.803064198803082e-06, "loss": 3.0032, "step": 1592 }, { "epoch": 0.5429447852760736, "grad_norm": 29.046443515653625, "learning_rate": 9.802512742988174e-06, "loss": 3.0798, "step": 1593 }, { "epoch": 0.5432856169052488, "grad_norm": 19.0569842627887, "learning_rate": 9.801960531718898e-06, "loss": 2.999, "step": 1594 }, { "epoch": 0.543626448534424, "grad_norm": 20.15938099234503, "learning_rate": 9.801407565082111e-06, "loss": 3.2121, "step": 1595 }, { "epoch": 0.5439672801635992, "grad_norm": 13.379928051203606, "learning_rate": 9.800853843164803e-06, "loss": 2.9685, "step": 1596 }, { "epoch": 0.5443081117927744, "grad_norm": 18.915001167614477, "learning_rate": 9.800299366054074e-06, "loss": 3.5305, "step": 1597 }, { "epoch": 0.5446489434219496, "grad_norm": 16.927476667317567, "learning_rate": 9.799744133837146e-06, "loss": 3.5487, "step": 1598 }, { "epoch": 0.5449897750511248, "grad_norm": 17.355929917410133, "learning_rate": 9.799188146601357e-06, "loss": 3.5321, "step": 1599 }, { "epoch": 0.5453306066803, "grad_norm": 21.346532564630852, "learning_rate": 9.798631404434171e-06, "loss": 2.7368, "step": 1600 }, { "epoch": 0.5456714383094751, "grad_norm": 19.357865155176892, "learning_rate": 9.79807390742316e-06, "loss": 3.3909, "step": 1601 }, { "epoch": 0.5460122699386503, "grad_norm": 12.361284368511987, "learning_rate": 9.797515655656026e-06, "loss": 3.1964, "step": 1602 }, { "epoch": 0.5463531015678255, "grad_norm": 38.44915539245129, "learning_rate": 9.79695664922058e-06, "loss": 3.3785, "step": 1603 }, { "epoch": 0.5466939331970007, "grad_norm": 19.400168352681344, "learning_rate": 9.796396888204758e-06, "loss": 2.1677, "step": 1604 }, { "epoch": 0.5470347648261759, "grad_norm": 14.555306366973157, "learning_rate": 9.795836372696611e-06, "loss": 3.1132, "step": 1605 }, { "epoch": 0.547375596455351, "grad_norm": 20.565165187085686, "learning_rate": 9.795275102784313e-06, "loss": 3.7085, "step": 1606 }, { "epoch": 0.5477164280845263, "grad_norm": 18.878151539880257, "learning_rate": 9.794713078556153e-06, "loss": 3.5641, "step": 1607 }, { "epoch": 0.5480572597137015, "grad_norm": 21.45197725002221, "learning_rate": 9.794150300100538e-06, "loss": 3.5332, "step": 1608 }, { "epoch": 0.5483980913428766, "grad_norm": 14.858367164159114, "learning_rate": 9.793586767505995e-06, "loss": 3.2428, "step": 1609 }, { "epoch": 0.5487389229720518, "grad_norm": 17.453274361208347, "learning_rate": 9.793022480861174e-06, "loss": 3.053, "step": 1610 }, { "epoch": 0.549079754601227, "grad_norm": 19.211320918476435, "learning_rate": 9.792457440254836e-06, "loss": 3.1129, "step": 1611 }, { "epoch": 0.5494205862304021, "grad_norm": 38.2449969216942, "learning_rate": 9.791891645775865e-06, "loss": 3.5408, "step": 1612 }, { "epoch": 0.5497614178595773, "grad_norm": 20.699184455481394, "learning_rate": 9.791325097513261e-06, "loss": 3.0889, "step": 1613 }, { "epoch": 0.5501022494887525, "grad_norm": 19.530840221111806, "learning_rate": 9.790757795556146e-06, "loss": 3.4241, "step": 1614 }, { "epoch": 0.5504430811179277, "grad_norm": 15.293559535215353, "learning_rate": 9.790189739993761e-06, "loss": 3.6441, "step": 1615 }, { "epoch": 0.5507839127471029, "grad_norm": 18.303314235913128, "learning_rate": 9.78962093091546e-06, "loss": 3.6989, "step": 1616 }, { "epoch": 0.5511247443762781, "grad_norm": 25.819985272536595, "learning_rate": 9.789051368410723e-06, "loss": 3.1663, "step": 1617 }, { "epoch": 0.5514655760054533, "grad_norm": 23.275192276313707, "learning_rate": 9.788481052569139e-06, "loss": 3.0462, "step": 1618 }, { "epoch": 0.5518064076346285, "grad_norm": 16.92218748119021, "learning_rate": 9.787909983480422e-06, "loss": 3.1385, "step": 1619 }, { "epoch": 0.5521472392638037, "grad_norm": 24.323514583929494, "learning_rate": 9.787338161234408e-06, "loss": 3.8555, "step": 1620 }, { "epoch": 0.5524880708929789, "grad_norm": 19.062689736285673, "learning_rate": 9.786765585921043e-06, "loss": 3.0739, "step": 1621 }, { "epoch": 0.5528289025221541, "grad_norm": 27.021687146844428, "learning_rate": 9.786192257630396e-06, "loss": 3.6903, "step": 1622 }, { "epoch": 0.5531697341513292, "grad_norm": 17.20370716966874, "learning_rate": 9.785618176452654e-06, "loss": 3.0834, "step": 1623 }, { "epoch": 0.5535105657805044, "grad_norm": 26.117489100936556, "learning_rate": 9.785043342478122e-06, "loss": 3.9895, "step": 1624 }, { "epoch": 0.5538513974096796, "grad_norm": 23.96701015593615, "learning_rate": 9.784467755797225e-06, "loss": 3.9757, "step": 1625 }, { "epoch": 0.5541922290388548, "grad_norm": 13.47531519438833, "learning_rate": 9.783891416500503e-06, "loss": 3.0538, "step": 1626 }, { "epoch": 0.55453306066803, "grad_norm": 23.184839114031046, "learning_rate": 9.783314324678618e-06, "loss": 3.9616, "step": 1627 }, { "epoch": 0.5548738922972052, "grad_norm": 15.896665085648067, "learning_rate": 9.78273648042235e-06, "loss": 3.0429, "step": 1628 }, { "epoch": 0.5552147239263804, "grad_norm": 16.584292409597342, "learning_rate": 9.782157883822593e-06, "loss": 3.9655, "step": 1629 }, { "epoch": 0.5555555555555556, "grad_norm": 15.414700296677145, "learning_rate": 9.781578534970366e-06, "loss": 3.0915, "step": 1630 }, { "epoch": 0.5558963871847308, "grad_norm": 13.927641787411831, "learning_rate": 9.780998433956798e-06, "loss": 3.2449, "step": 1631 }, { "epoch": 0.556237218813906, "grad_norm": 16.050433572300463, "learning_rate": 9.780417580873147e-06, "loss": 4.1973, "step": 1632 }, { "epoch": 0.5565780504430811, "grad_norm": 20.662314740514987, "learning_rate": 9.779835975810781e-06, "loss": 3.355, "step": 1633 }, { "epoch": 0.5569188820722563, "grad_norm": 17.368809628416585, "learning_rate": 9.779253618861187e-06, "loss": 3.2864, "step": 1634 }, { "epoch": 0.5572597137014315, "grad_norm": 16.489728292250785, "learning_rate": 9.778670510115975e-06, "loss": 3.3656, "step": 1635 }, { "epoch": 0.5576005453306067, "grad_norm": 19.610326881908954, "learning_rate": 9.778086649666866e-06, "loss": 3.6221, "step": 1636 }, { "epoch": 0.5579413769597819, "grad_norm": 17.115994637490854, "learning_rate": 9.777502037605711e-06, "loss": 3.4412, "step": 1637 }, { "epoch": 0.558282208588957, "grad_norm": 20.381716377262936, "learning_rate": 9.776916674024464e-06, "loss": 3.4456, "step": 1638 }, { "epoch": 0.5586230402181322, "grad_norm": 16.555081429676417, "learning_rate": 9.77633055901521e-06, "loss": 3.2284, "step": 1639 }, { "epoch": 0.5589638718473074, "grad_norm": 15.506779095145, "learning_rate": 9.775743692670145e-06, "loss": 2.9177, "step": 1640 }, { "epoch": 0.5593047034764826, "grad_norm": 17.833966397344103, "learning_rate": 9.775156075081585e-06, "loss": 2.8882, "step": 1641 }, { "epoch": 0.5596455351056578, "grad_norm": 25.061388202303085, "learning_rate": 9.774567706341968e-06, "loss": 3.9267, "step": 1642 }, { "epoch": 0.559986366734833, "grad_norm": 14.087858683556812, "learning_rate": 9.773978586543843e-06, "loss": 3.4599, "step": 1643 }, { "epoch": 0.5603271983640081, "grad_norm": 19.296110892844645, "learning_rate": 9.773388715779882e-06, "loss": 3.5348, "step": 1644 }, { "epoch": 0.5606680299931833, "grad_norm": 18.03541357821088, "learning_rate": 9.772798094142874e-06, "loss": 3.6253, "step": 1645 }, { "epoch": 0.5610088616223585, "grad_norm": 17.928299268217412, "learning_rate": 9.772206721725726e-06, "loss": 3.3037, "step": 1646 }, { "epoch": 0.5613496932515337, "grad_norm": 16.528704855402648, "learning_rate": 9.771614598621465e-06, "loss": 3.325, "step": 1647 }, { "epoch": 0.5616905248807089, "grad_norm": 19.788673728801015, "learning_rate": 9.771021724923233e-06, "loss": 3.4014, "step": 1648 }, { "epoch": 0.5620313565098841, "grad_norm": 19.55693390714591, "learning_rate": 9.77042810072429e-06, "loss": 3.1488, "step": 1649 }, { "epoch": 0.5623721881390593, "grad_norm": 23.384242118423504, "learning_rate": 9.769833726118018e-06, "loss": 3.3793, "step": 1650 }, { "epoch": 0.5627130197682345, "grad_norm": 22.117570414815415, "learning_rate": 9.769238601197913e-06, "loss": 3.0375, "step": 1651 }, { "epoch": 0.5630538513974097, "grad_norm": 13.507446397030291, "learning_rate": 9.76864272605759e-06, "loss": 3.101, "step": 1652 }, { "epoch": 0.5633946830265849, "grad_norm": 26.700571920440346, "learning_rate": 9.768046100790783e-06, "loss": 3.3218, "step": 1653 }, { "epoch": 0.5637355146557601, "grad_norm": 19.532855088842652, "learning_rate": 9.767448725491345e-06, "loss": 3.0882, "step": 1654 }, { "epoch": 0.5640763462849352, "grad_norm": 22.099595054955273, "learning_rate": 9.766850600253243e-06, "loss": 3.6108, "step": 1655 }, { "epoch": 0.5644171779141104, "grad_norm": 17.943772869233804, "learning_rate": 9.766251725170567e-06, "loss": 3.2113, "step": 1656 }, { "epoch": 0.5647580095432856, "grad_norm": 16.16315135674204, "learning_rate": 9.765652100337523e-06, "loss": 3.1791, "step": 1657 }, { "epoch": 0.5650988411724608, "grad_norm": 20.867805006684456, "learning_rate": 9.765051725848431e-06, "loss": 3.0327, "step": 1658 }, { "epoch": 0.565439672801636, "grad_norm": 14.641070241892788, "learning_rate": 9.764450601797733e-06, "loss": 2.8684, "step": 1659 }, { "epoch": 0.5657805044308112, "grad_norm": 31.414163938664256, "learning_rate": 9.76384872827999e-06, "loss": 3.7878, "step": 1660 }, { "epoch": 0.5661213360599864, "grad_norm": 32.29708684015511, "learning_rate": 9.763246105389878e-06, "loss": 3.6638, "step": 1661 }, { "epoch": 0.5664621676891616, "grad_norm": 19.243617720153274, "learning_rate": 9.762642733222192e-06, "loss": 3.6802, "step": 1662 }, { "epoch": 0.5668029993183368, "grad_norm": 27.12063490116175, "learning_rate": 9.762038611871847e-06, "loss": 3.0304, "step": 1663 }, { "epoch": 0.567143830947512, "grad_norm": 21.952715801497042, "learning_rate": 9.76143374143387e-06, "loss": 3.8243, "step": 1664 }, { "epoch": 0.5674846625766872, "grad_norm": 15.855859610447718, "learning_rate": 9.760828122003412e-06, "loss": 3.3323, "step": 1665 }, { "epoch": 0.5678254942058623, "grad_norm": 12.418121967709316, "learning_rate": 9.760221753675738e-06, "loss": 2.9369, "step": 1666 }, { "epoch": 0.5681663258350375, "grad_norm": 22.82029166558021, "learning_rate": 9.759614636546233e-06, "loss": 3.5121, "step": 1667 }, { "epoch": 0.5685071574642127, "grad_norm": 19.00558604869277, "learning_rate": 9.759006770710399e-06, "loss": 3.1048, "step": 1668 }, { "epoch": 0.5688479890933879, "grad_norm": 36.26538696488828, "learning_rate": 9.758398156263856e-06, "loss": 3.7706, "step": 1669 }, { "epoch": 0.569188820722563, "grad_norm": 24.521662502237394, "learning_rate": 9.757788793302338e-06, "loss": 2.6241, "step": 1670 }, { "epoch": 0.5695296523517382, "grad_norm": 20.35130945863565, "learning_rate": 9.757178681921706e-06, "loss": 3.8115, "step": 1671 }, { "epoch": 0.5698704839809134, "grad_norm": 32.99574492072187, "learning_rate": 9.75656782221793e-06, "loss": 3.2726, "step": 1672 }, { "epoch": 0.5702113156100886, "grad_norm": 17.88265634427725, "learning_rate": 9.755956214287098e-06, "loss": 3.0049, "step": 1673 }, { "epoch": 0.5705521472392638, "grad_norm": 21.091673452877927, "learning_rate": 9.755343858225426e-06, "loss": 3.6076, "step": 1674 }, { "epoch": 0.570892978868439, "grad_norm": 18.57965269474806, "learning_rate": 9.75473075412923e-06, "loss": 3.5048, "step": 1675 }, { "epoch": 0.5712338104976141, "grad_norm": 17.066930905625384, "learning_rate": 9.754116902094963e-06, "loss": 3.011, "step": 1676 }, { "epoch": 0.5715746421267893, "grad_norm": 17.37129729613841, "learning_rate": 9.75350230221918e-06, "loss": 3.4308, "step": 1677 }, { "epoch": 0.5719154737559645, "grad_norm": 20.868200149225153, "learning_rate": 9.752886954598564e-06, "loss": 3.2967, "step": 1678 }, { "epoch": 0.5722563053851397, "grad_norm": 21.98096232576628, "learning_rate": 9.752270859329908e-06, "loss": 3.4418, "step": 1679 }, { "epoch": 0.5725971370143149, "grad_norm": 32.345992688775816, "learning_rate": 9.75165401651013e-06, "loss": 3.6531, "step": 1680 }, { "epoch": 0.5729379686434901, "grad_norm": 23.80895702210845, "learning_rate": 9.751036426236258e-06, "loss": 3.4389, "step": 1681 }, { "epoch": 0.5732788002726653, "grad_norm": 21.042913556074335, "learning_rate": 9.750418088605445e-06, "loss": 3.5839, "step": 1682 }, { "epoch": 0.5736196319018405, "grad_norm": 20.073834587778606, "learning_rate": 9.749799003714954e-06, "loss": 3.4739, "step": 1683 }, { "epoch": 0.5739604635310157, "grad_norm": 20.11950536380492, "learning_rate": 9.749179171662174e-06, "loss": 2.9713, "step": 1684 }, { "epoch": 0.5743012951601909, "grad_norm": 17.153768407921994, "learning_rate": 9.748558592544604e-06, "loss": 2.7397, "step": 1685 }, { "epoch": 0.5746421267893661, "grad_norm": 15.685230551478982, "learning_rate": 9.747937266459867e-06, "loss": 3.3199, "step": 1686 }, { "epoch": 0.5749829584185412, "grad_norm": 28.347930590695363, "learning_rate": 9.747315193505695e-06, "loss": 3.1641, "step": 1687 }, { "epoch": 0.5753237900477164, "grad_norm": 16.78628184433956, "learning_rate": 9.746692373779945e-06, "loss": 3.3041, "step": 1688 }, { "epoch": 0.5756646216768916, "grad_norm": 15.638622825433908, "learning_rate": 9.74606880738059e-06, "loss": 3.5301, "step": 1689 }, { "epoch": 0.5760054533060668, "grad_norm": 16.660191092037085, "learning_rate": 9.745444494405718e-06, "loss": 3.1102, "step": 1690 }, { "epoch": 0.576346284935242, "grad_norm": 15.145293729868367, "learning_rate": 9.744819434953537e-06, "loss": 3.1792, "step": 1691 }, { "epoch": 0.5766871165644172, "grad_norm": 14.26129323429016, "learning_rate": 9.74419362912237e-06, "loss": 3.2652, "step": 1692 }, { "epoch": 0.5770279481935924, "grad_norm": 15.897537768864254, "learning_rate": 9.74356707701066e-06, "loss": 3.2117, "step": 1693 }, { "epoch": 0.5773687798227676, "grad_norm": 14.695673139894016, "learning_rate": 9.742939778716965e-06, "loss": 3.0266, "step": 1694 }, { "epoch": 0.5777096114519428, "grad_norm": 13.649979372066875, "learning_rate": 9.742311734339964e-06, "loss": 3.133, "step": 1695 }, { "epoch": 0.578050443081118, "grad_norm": 15.658604536453211, "learning_rate": 9.741682943978447e-06, "loss": 3.7167, "step": 1696 }, { "epoch": 0.5783912747102932, "grad_norm": 11.94210990787602, "learning_rate": 9.741053407731329e-06, "loss": 3.2525, "step": 1697 }, { "epoch": 0.5787321063394683, "grad_norm": 16.630800247906436, "learning_rate": 9.740423125697638e-06, "loss": 3.897, "step": 1698 }, { "epoch": 0.5790729379686435, "grad_norm": 21.933370652759564, "learning_rate": 9.739792097976517e-06, "loss": 4.1034, "step": 1699 }, { "epoch": 0.5794137695978187, "grad_norm": 16.283444476659973, "learning_rate": 9.739160324667231e-06, "loss": 3.1147, "step": 1700 }, { "epoch": 0.5797546012269938, "grad_norm": 20.222291723604275, "learning_rate": 9.73852780586916e-06, "loss": 3.6643, "step": 1701 }, { "epoch": 0.580095432856169, "grad_norm": 34.86842990658669, "learning_rate": 9.737894541681804e-06, "loss": 3.4645, "step": 1702 }, { "epoch": 0.5804362644853442, "grad_norm": 10.131899107706852, "learning_rate": 9.737260532204775e-06, "loss": 3.0308, "step": 1703 }, { "epoch": 0.5807770961145194, "grad_norm": 27.301909822686365, "learning_rate": 9.736625777537807e-06, "loss": 3.4509, "step": 1704 }, { "epoch": 0.5811179277436946, "grad_norm": 21.188859726753112, "learning_rate": 9.735990277780747e-06, "loss": 3.6515, "step": 1705 }, { "epoch": 0.5814587593728698, "grad_norm": 20.646469722038926, "learning_rate": 9.735354033033564e-06, "loss": 3.4244, "step": 1706 }, { "epoch": 0.581799591002045, "grad_norm": 19.111392591774955, "learning_rate": 9.734717043396342e-06, "loss": 3.2992, "step": 1707 }, { "epoch": 0.5821404226312201, "grad_norm": 12.818916658618486, "learning_rate": 9.734079308969282e-06, "loss": 2.795, "step": 1708 }, { "epoch": 0.5824812542603953, "grad_norm": 17.863965712204635, "learning_rate": 9.7334408298527e-06, "loss": 3.0267, "step": 1709 }, { "epoch": 0.5828220858895705, "grad_norm": 18.371143372987238, "learning_rate": 9.732801606147033e-06, "loss": 3.3958, "step": 1710 }, { "epoch": 0.5831629175187457, "grad_norm": 25.73759906154846, "learning_rate": 9.732161637952834e-06, "loss": 3.5781, "step": 1711 }, { "epoch": 0.5835037491479209, "grad_norm": 16.656303230555313, "learning_rate": 9.73152092537077e-06, "loss": 3.1206, "step": 1712 }, { "epoch": 0.5838445807770961, "grad_norm": 16.052805562940392, "learning_rate": 9.73087946850163e-06, "loss": 3.0211, "step": 1713 }, { "epoch": 0.5841854124062713, "grad_norm": 19.82719606798507, "learning_rate": 9.730237267446318e-06, "loss": 3.5862, "step": 1714 }, { "epoch": 0.5845262440354465, "grad_norm": 15.362003875352956, "learning_rate": 9.729594322305853e-06, "loss": 3.4804, "step": 1715 }, { "epoch": 0.5848670756646217, "grad_norm": 19.706397312138662, "learning_rate": 9.728950633181375e-06, "loss": 3.9479, "step": 1716 }, { "epoch": 0.5852079072937969, "grad_norm": 29.31642775584269, "learning_rate": 9.728306200174136e-06, "loss": 3.3872, "step": 1717 }, { "epoch": 0.5855487389229721, "grad_norm": 19.672143710946138, "learning_rate": 9.72766102338551e-06, "loss": 3.1713, "step": 1718 }, { "epoch": 0.5858895705521472, "grad_norm": 14.978996288300168, "learning_rate": 9.727015102916986e-06, "loss": 3.314, "step": 1719 }, { "epoch": 0.5862304021813224, "grad_norm": 12.535090190387233, "learning_rate": 9.726368438870169e-06, "loss": 2.9904, "step": 1720 }, { "epoch": 0.5865712338104976, "grad_norm": 29.853944149723763, "learning_rate": 9.72572103134678e-06, "loss": 3.5488, "step": 1721 }, { "epoch": 0.5869120654396728, "grad_norm": 12.568757566148179, "learning_rate": 9.725072880448663e-06, "loss": 3.2826, "step": 1722 }, { "epoch": 0.587252897068848, "grad_norm": 19.895205566469492, "learning_rate": 9.724423986277772e-06, "loss": 3.7687, "step": 1723 }, { "epoch": 0.5875937286980232, "grad_norm": 19.463887104307048, "learning_rate": 9.72377434893618e-06, "loss": 3.1531, "step": 1724 }, { "epoch": 0.5879345603271984, "grad_norm": 16.69781491599921, "learning_rate": 9.72312396852608e-06, "loss": 2.9758, "step": 1725 }, { "epoch": 0.5882753919563736, "grad_norm": 19.317821633422565, "learning_rate": 9.722472845149777e-06, "loss": 3.4119, "step": 1726 }, { "epoch": 0.5886162235855488, "grad_norm": 24.64048825929287, "learning_rate": 9.721820978909698e-06, "loss": 4.0101, "step": 1727 }, { "epoch": 0.588957055214724, "grad_norm": 13.085506404682956, "learning_rate": 9.72116836990838e-06, "loss": 2.7017, "step": 1728 }, { "epoch": 0.5892978868438992, "grad_norm": 15.047291198385711, "learning_rate": 9.720515018248483e-06, "loss": 3.4557, "step": 1729 }, { "epoch": 0.5896387184730743, "grad_norm": 17.695315801435573, "learning_rate": 9.719860924032786e-06, "loss": 3.3825, "step": 1730 }, { "epoch": 0.5899795501022495, "grad_norm": 33.46290133391076, "learning_rate": 9.719206087364174e-06, "loss": 3.4395, "step": 1731 }, { "epoch": 0.5903203817314246, "grad_norm": 18.58268265990649, "learning_rate": 9.718550508345661e-06, "loss": 3.1963, "step": 1732 }, { "epoch": 0.5906612133605998, "grad_norm": 10.030358439421084, "learning_rate": 9.717894187080368e-06, "loss": 3.3816, "step": 1733 }, { "epoch": 0.591002044989775, "grad_norm": 14.628639121194412, "learning_rate": 9.717237123671539e-06, "loss": 3.1266, "step": 1734 }, { "epoch": 0.5913428766189502, "grad_norm": 15.485227376649549, "learning_rate": 9.716579318222532e-06, "loss": 3.4757, "step": 1735 }, { "epoch": 0.5916837082481254, "grad_norm": 23.133060374852793, "learning_rate": 9.715920770836822e-06, "loss": 2.8721, "step": 1736 }, { "epoch": 0.5920245398773006, "grad_norm": 12.010613608909534, "learning_rate": 9.715261481618004e-06, "loss": 3.0459, "step": 1737 }, { "epoch": 0.5923653715064758, "grad_norm": 15.108066919578581, "learning_rate": 9.714601450669781e-06, "loss": 2.9643, "step": 1738 }, { "epoch": 0.592706203135651, "grad_norm": 21.970325386998258, "learning_rate": 9.713940678095985e-06, "loss": 3.2193, "step": 1739 }, { "epoch": 0.5930470347648262, "grad_norm": 22.137009450507044, "learning_rate": 9.713279164000552e-06, "loss": 3.9195, "step": 1740 }, { "epoch": 0.5933878663940013, "grad_norm": 18.48329499315672, "learning_rate": 9.712616908487549e-06, "loss": 3.3258, "step": 1741 }, { "epoch": 0.5937286980231765, "grad_norm": 15.285310420037039, "learning_rate": 9.711953911661142e-06, "loss": 3.0607, "step": 1742 }, { "epoch": 0.5940695296523517, "grad_norm": 22.484314381025495, "learning_rate": 9.711290173625629e-06, "loss": 3.1433, "step": 1743 }, { "epoch": 0.5944103612815269, "grad_norm": 16.560494292257584, "learning_rate": 9.710625694485419e-06, "loss": 3.1543, "step": 1744 }, { "epoch": 0.5947511929107021, "grad_norm": 21.143120713135982, "learning_rate": 9.709960474345033e-06, "loss": 2.9393, "step": 1745 }, { "epoch": 0.5950920245398773, "grad_norm": 27.503473944805915, "learning_rate": 9.709294513309116e-06, "loss": 3.3063, "step": 1746 }, { "epoch": 0.5954328561690525, "grad_norm": 17.087838545492417, "learning_rate": 9.708627811482425e-06, "loss": 3.3532, "step": 1747 }, { "epoch": 0.5957736877982277, "grad_norm": 16.011383297670417, "learning_rate": 9.707960368969835e-06, "loss": 3.3669, "step": 1748 }, { "epoch": 0.5961145194274029, "grad_norm": 15.585304441511918, "learning_rate": 9.707292185876338e-06, "loss": 3.5645, "step": 1749 }, { "epoch": 0.5964553510565781, "grad_norm": 14.90797392911369, "learning_rate": 9.706623262307043e-06, "loss": 3.2119, "step": 1750 }, { "epoch": 0.5967961826857532, "grad_norm": 34.46728319386735, "learning_rate": 9.70595359836717e-06, "loss": 3.8667, "step": 1751 }, { "epoch": 0.5971370143149284, "grad_norm": 17.926165341434796, "learning_rate": 9.705283194162064e-06, "loss": 2.8577, "step": 1752 }, { "epoch": 0.5974778459441036, "grad_norm": 17.72102081500821, "learning_rate": 9.704612049797182e-06, "loss": 3.2888, "step": 1753 }, { "epoch": 0.5978186775732788, "grad_norm": 19.131007534154147, "learning_rate": 9.703940165378095e-06, "loss": 3.2549, "step": 1754 }, { "epoch": 0.598159509202454, "grad_norm": 15.403960353858936, "learning_rate": 9.703267541010495e-06, "loss": 3.6266, "step": 1755 }, { "epoch": 0.5985003408316292, "grad_norm": 18.141761538154697, "learning_rate": 9.702594176800189e-06, "loss": 3.4513, "step": 1756 }, { "epoch": 0.5988411724608044, "grad_norm": 27.417494811955905, "learning_rate": 9.7019200728531e-06, "loss": 4.0869, "step": 1757 }, { "epoch": 0.5991820040899796, "grad_norm": 15.733982780764096, "learning_rate": 9.701245229275264e-06, "loss": 3.214, "step": 1758 }, { "epoch": 0.5995228357191548, "grad_norm": 40.084443808419486, "learning_rate": 9.70056964617284e-06, "loss": 3.089, "step": 1759 }, { "epoch": 0.59986366734833, "grad_norm": 16.707991751760506, "learning_rate": 9.699893323652098e-06, "loss": 3.6205, "step": 1760 }, { "epoch": 0.6002044989775052, "grad_norm": 17.560822308231135, "learning_rate": 9.699216261819426e-06, "loss": 3.1029, "step": 1761 }, { "epoch": 0.6005453306066802, "grad_norm": 16.31764355915314, "learning_rate": 9.698538460781333e-06, "loss": 3.5345, "step": 1762 }, { "epoch": 0.6008861622358554, "grad_norm": 18.47959433778221, "learning_rate": 9.697859920644432e-06, "loss": 4.0105, "step": 1763 }, { "epoch": 0.6012269938650306, "grad_norm": 19.528025694746837, "learning_rate": 9.697180641515467e-06, "loss": 3.3635, "step": 1764 }, { "epoch": 0.6015678254942058, "grad_norm": 26.649094177420068, "learning_rate": 9.696500623501289e-06, "loss": 4.0405, "step": 1765 }, { "epoch": 0.601908657123381, "grad_norm": 27.399155621177428, "learning_rate": 9.695819866708867e-06, "loss": 4.0702, "step": 1766 }, { "epoch": 0.6022494887525562, "grad_norm": 14.324142637352084, "learning_rate": 9.695138371245286e-06, "loss": 3.1551, "step": 1767 }, { "epoch": 0.6025903203817314, "grad_norm": 18.0018605933936, "learning_rate": 9.69445613721775e-06, "loss": 2.9436, "step": 1768 }, { "epoch": 0.6029311520109066, "grad_norm": 18.371189287082153, "learning_rate": 9.693773164733576e-06, "loss": 3.62, "step": 1769 }, { "epoch": 0.6032719836400818, "grad_norm": 20.075763829815855, "learning_rate": 9.693089453900198e-06, "loss": 3.9432, "step": 1770 }, { "epoch": 0.603612815269257, "grad_norm": 12.512502426447503, "learning_rate": 9.692405004825166e-06, "loss": 3.2894, "step": 1771 }, { "epoch": 0.6039536468984322, "grad_norm": 29.790802309400387, "learning_rate": 9.691719817616148e-06, "loss": 3.9221, "step": 1772 }, { "epoch": 0.6042944785276073, "grad_norm": 11.676445592325315, "learning_rate": 9.691033892380927e-06, "loss": 3.0217, "step": 1773 }, { "epoch": 0.6046353101567825, "grad_norm": 19.58354859574178, "learning_rate": 9.6903472292274e-06, "loss": 3.1649, "step": 1774 }, { "epoch": 0.6049761417859577, "grad_norm": 19.214764802630345, "learning_rate": 9.689659828263582e-06, "loss": 3.6177, "step": 1775 }, { "epoch": 0.6053169734151329, "grad_norm": 33.215468784956435, "learning_rate": 9.688971689597605e-06, "loss": 2.9406, "step": 1776 }, { "epoch": 0.6056578050443081, "grad_norm": 34.4364586749498, "learning_rate": 9.688282813337717e-06, "loss": 3.0985, "step": 1777 }, { "epoch": 0.6059986366734833, "grad_norm": 14.018101748450775, "learning_rate": 9.687593199592278e-06, "loss": 3.2192, "step": 1778 }, { "epoch": 0.6063394683026585, "grad_norm": 18.332921175345987, "learning_rate": 9.686902848469768e-06, "loss": 3.1068, "step": 1779 }, { "epoch": 0.6066802999318337, "grad_norm": 15.899824386929613, "learning_rate": 9.686211760078783e-06, "loss": 2.9783, "step": 1780 }, { "epoch": 0.6070211315610089, "grad_norm": 14.28342884102595, "learning_rate": 9.685519934528033e-06, "loss": 3.387, "step": 1781 }, { "epoch": 0.6073619631901841, "grad_norm": 12.998057687259088, "learning_rate": 9.684827371926346e-06, "loss": 3.1405, "step": 1782 }, { "epoch": 0.6077027948193592, "grad_norm": 14.952210105424163, "learning_rate": 9.684134072382664e-06, "loss": 3.1433, "step": 1783 }, { "epoch": 0.6080436264485344, "grad_norm": 18.73245511395826, "learning_rate": 9.683440036006046e-06, "loss": 3.1966, "step": 1784 }, { "epoch": 0.6083844580777096, "grad_norm": 23.69203177551261, "learning_rate": 9.682745262905667e-06, "loss": 4.037, "step": 1785 }, { "epoch": 0.6087252897068848, "grad_norm": 56.65339131637852, "learning_rate": 9.682049753190816e-06, "loss": 3.2339, "step": 1786 }, { "epoch": 0.60906612133606, "grad_norm": 19.33934175243023, "learning_rate": 9.681353506970901e-06, "loss": 3.2614, "step": 1787 }, { "epoch": 0.6094069529652352, "grad_norm": 23.18493226215609, "learning_rate": 9.680656524355444e-06, "loss": 2.9967, "step": 1788 }, { "epoch": 0.6097477845944104, "grad_norm": 34.775263290492724, "learning_rate": 9.679958805454084e-06, "loss": 3.2045, "step": 1789 }, { "epoch": 0.6100886162235856, "grad_norm": 16.414338872007665, "learning_rate": 9.679260350376574e-06, "loss": 3.4499, "step": 1790 }, { "epoch": 0.6104294478527608, "grad_norm": 17.019092895899327, "learning_rate": 9.678561159232784e-06, "loss": 4.0108, "step": 1791 }, { "epoch": 0.610770279481936, "grad_norm": 13.156088882837388, "learning_rate": 9.677861232132699e-06, "loss": 2.7811, "step": 1792 }, { "epoch": 0.6111111111111112, "grad_norm": 16.883566441252388, "learning_rate": 9.67716056918642e-06, "loss": 3.5357, "step": 1793 }, { "epoch": 0.6114519427402862, "grad_norm": 17.174598067804375, "learning_rate": 9.676459170504165e-06, "loss": 3.7759, "step": 1794 }, { "epoch": 0.6117927743694614, "grad_norm": 13.273156481569139, "learning_rate": 9.675757036196268e-06, "loss": 3.397, "step": 1795 }, { "epoch": 0.6121336059986366, "grad_norm": 13.898923199502358, "learning_rate": 9.675054166373174e-06, "loss": 3.2752, "step": 1796 }, { "epoch": 0.6124744376278118, "grad_norm": 18.641911625176398, "learning_rate": 9.67435056114545e-06, "loss": 3.0756, "step": 1797 }, { "epoch": 0.612815269256987, "grad_norm": 31.34538033378389, "learning_rate": 9.673646220623776e-06, "loss": 3.3097, "step": 1798 }, { "epoch": 0.6131561008861622, "grad_norm": 20.537599329393217, "learning_rate": 9.672941144918946e-06, "loss": 3.3971, "step": 1799 }, { "epoch": 0.6134969325153374, "grad_norm": 15.152559174975213, "learning_rate": 9.672235334141873e-06, "loss": 3.3698, "step": 1800 }, { "epoch": 0.6138377641445126, "grad_norm": 14.108301168258606, "learning_rate": 9.671528788403582e-06, "loss": 2.8024, "step": 1801 }, { "epoch": 0.6141785957736878, "grad_norm": 14.314416365442304, "learning_rate": 9.670821507815215e-06, "loss": 2.8505, "step": 1802 }, { "epoch": 0.614519427402863, "grad_norm": 13.746173824030882, "learning_rate": 9.670113492488033e-06, "loss": 2.7899, "step": 1803 }, { "epoch": 0.6148602590320382, "grad_norm": 19.33679944487343, "learning_rate": 9.669404742533409e-06, "loss": 3.1968, "step": 1804 }, { "epoch": 0.6152010906612133, "grad_norm": 17.55018859703818, "learning_rate": 9.668695258062828e-06, "loss": 3.4387, "step": 1805 }, { "epoch": 0.6155419222903885, "grad_norm": 16.886446110418067, "learning_rate": 9.667985039187902e-06, "loss": 2.6743, "step": 1806 }, { "epoch": 0.6158827539195637, "grad_norm": 27.700302233006546, "learning_rate": 9.667274086020343e-06, "loss": 3.5538, "step": 1807 }, { "epoch": 0.6162235855487389, "grad_norm": 30.244565436605427, "learning_rate": 9.666562398671995e-06, "loss": 3.2588, "step": 1808 }, { "epoch": 0.6165644171779141, "grad_norm": 20.26335838019999, "learning_rate": 9.665849977254802e-06, "loss": 3.6427, "step": 1809 }, { "epoch": 0.6169052488070893, "grad_norm": 27.77334602516442, "learning_rate": 9.665136821880834e-06, "loss": 3.2375, "step": 1810 }, { "epoch": 0.6172460804362645, "grad_norm": 16.44531524760947, "learning_rate": 9.664422932662275e-06, "loss": 2.9253, "step": 1811 }, { "epoch": 0.6175869120654397, "grad_norm": 21.80302981711445, "learning_rate": 9.66370830971142e-06, "loss": 3.4555, "step": 1812 }, { "epoch": 0.6179277436946149, "grad_norm": 15.616377422993686, "learning_rate": 9.662992953140682e-06, "loss": 3.3133, "step": 1813 }, { "epoch": 0.6182685753237901, "grad_norm": 20.962309783746115, "learning_rate": 9.662276863062592e-06, "loss": 2.797, "step": 1814 }, { "epoch": 0.6186094069529653, "grad_norm": 15.978376943741383, "learning_rate": 9.66156003958979e-06, "loss": 3.4829, "step": 1815 }, { "epoch": 0.6189502385821404, "grad_norm": 16.717870616965534, "learning_rate": 9.660842482835039e-06, "loss": 3.1759, "step": 1816 }, { "epoch": 0.6192910702113156, "grad_norm": 17.18510611054113, "learning_rate": 9.660124192911208e-06, "loss": 3.5615, "step": 1817 }, { "epoch": 0.6196319018404908, "grad_norm": 20.349384926759715, "learning_rate": 9.659405169931296e-06, "loss": 3.9533, "step": 1818 }, { "epoch": 0.619972733469666, "grad_norm": 19.748165624345436, "learning_rate": 9.658685414008397e-06, "loss": 3.469, "step": 1819 }, { "epoch": 0.6203135650988412, "grad_norm": 15.27514433153184, "learning_rate": 9.65796492525574e-06, "loss": 3.3842, "step": 1820 }, { "epoch": 0.6206543967280164, "grad_norm": 12.577268722615022, "learning_rate": 9.65724370378666e-06, "loss": 3.2128, "step": 1821 }, { "epoch": 0.6209952283571916, "grad_norm": 16.347382691662606, "learning_rate": 9.656521749714603e-06, "loss": 3.3777, "step": 1822 }, { "epoch": 0.6213360599863668, "grad_norm": 17.84220432191084, "learning_rate": 9.655799063153139e-06, "loss": 3.0493, "step": 1823 }, { "epoch": 0.621676891615542, "grad_norm": 30.497852188661568, "learning_rate": 9.65507564421595e-06, "loss": 3.4249, "step": 1824 }, { "epoch": 0.6220177232447172, "grad_norm": 16.67352836233098, "learning_rate": 9.654351493016831e-06, "loss": 3.2943, "step": 1825 }, { "epoch": 0.6223585548738922, "grad_norm": 14.207902036529346, "learning_rate": 9.653626609669694e-06, "loss": 3.4879, "step": 1826 }, { "epoch": 0.6226993865030674, "grad_norm": 25.47878905480449, "learning_rate": 9.652900994288565e-06, "loss": 3.5075, "step": 1827 }, { "epoch": 0.6230402181322426, "grad_norm": 22.910564255233158, "learning_rate": 9.652174646987588e-06, "loss": 3.2844, "step": 1828 }, { "epoch": 0.6233810497614178, "grad_norm": 12.849090796188992, "learning_rate": 9.651447567881021e-06, "loss": 3.1157, "step": 1829 }, { "epoch": 0.623721881390593, "grad_norm": 12.735103601478624, "learning_rate": 9.650719757083233e-06, "loss": 3.1012, "step": 1830 }, { "epoch": 0.6240627130197682, "grad_norm": 15.699814846927211, "learning_rate": 9.649991214708716e-06, "loss": 2.9129, "step": 1831 }, { "epoch": 0.6244035446489434, "grad_norm": 36.060754340209265, "learning_rate": 9.64926194087207e-06, "loss": 2.7859, "step": 1832 }, { "epoch": 0.6247443762781186, "grad_norm": 16.53714020497567, "learning_rate": 9.648531935688012e-06, "loss": 3.119, "step": 1833 }, { "epoch": 0.6250852079072938, "grad_norm": 39.18307371935072, "learning_rate": 9.647801199271376e-06, "loss": 2.9805, "step": 1834 }, { "epoch": 0.625426039536469, "grad_norm": 16.97595558283937, "learning_rate": 9.647069731737111e-06, "loss": 3.8182, "step": 1835 }, { "epoch": 0.6257668711656442, "grad_norm": 9.860569606805274, "learning_rate": 9.646337533200277e-06, "loss": 2.5504, "step": 1836 }, { "epoch": 0.6261077027948193, "grad_norm": 15.685724181421113, "learning_rate": 9.645604603776053e-06, "loss": 3.2027, "step": 1837 }, { "epoch": 0.6264485344239945, "grad_norm": 17.138342701064246, "learning_rate": 9.644870943579732e-06, "loss": 3.3941, "step": 1838 }, { "epoch": 0.6267893660531697, "grad_norm": 24.120690620969757, "learning_rate": 9.644136552726721e-06, "loss": 3.4363, "step": 1839 }, { "epoch": 0.6271301976823449, "grad_norm": 12.887324635803548, "learning_rate": 9.643401431332542e-06, "loss": 3.5322, "step": 1840 }, { "epoch": 0.6274710293115201, "grad_norm": 21.07281613749816, "learning_rate": 9.642665579512835e-06, "loss": 3.3731, "step": 1841 }, { "epoch": 0.6278118609406953, "grad_norm": 26.417153229617224, "learning_rate": 9.641928997383352e-06, "loss": 3.7541, "step": 1842 }, { "epoch": 0.6281526925698705, "grad_norm": 23.53196803364887, "learning_rate": 9.641191685059958e-06, "loss": 2.9649, "step": 1843 }, { "epoch": 0.6284935241990457, "grad_norm": 31.738404525989697, "learning_rate": 9.640453642658637e-06, "loss": 3.3387, "step": 1844 }, { "epoch": 0.6288343558282209, "grad_norm": 22.51145971746865, "learning_rate": 9.639714870295484e-06, "loss": 3.61, "step": 1845 }, { "epoch": 0.6291751874573961, "grad_norm": 15.879174328582993, "learning_rate": 9.638975368086714e-06, "loss": 3.0667, "step": 1846 }, { "epoch": 0.6295160190865713, "grad_norm": 20.43142398231541, "learning_rate": 9.638235136148652e-06, "loss": 3.6289, "step": 1847 }, { "epoch": 0.6298568507157464, "grad_norm": 19.285960679220977, "learning_rate": 9.637494174597738e-06, "loss": 3.3616, "step": 1848 }, { "epoch": 0.6301976823449216, "grad_norm": 18.051598506379808, "learning_rate": 9.63675248355053e-06, "loss": 3.3051, "step": 1849 }, { "epoch": 0.6305385139740968, "grad_norm": 15.676887315075097, "learning_rate": 9.636010063123698e-06, "loss": 2.9841, "step": 1850 }, { "epoch": 0.630879345603272, "grad_norm": 19.804638979770427, "learning_rate": 9.63526691343403e-06, "loss": 3.0773, "step": 1851 }, { "epoch": 0.6312201772324472, "grad_norm": 13.653545484388776, "learning_rate": 9.634523034598421e-06, "loss": 3.5447, "step": 1852 }, { "epoch": 0.6315610088616224, "grad_norm": 19.035564323226595, "learning_rate": 9.633778426733893e-06, "loss": 3.4079, "step": 1853 }, { "epoch": 0.6319018404907976, "grad_norm": 25.59027052932139, "learning_rate": 9.633033089957572e-06, "loss": 3.3798, "step": 1854 }, { "epoch": 0.6322426721199728, "grad_norm": 18.623654800458535, "learning_rate": 9.632287024386702e-06, "loss": 3.6531, "step": 1855 }, { "epoch": 0.632583503749148, "grad_norm": 17.91486067353337, "learning_rate": 9.631540230138644e-06, "loss": 3.3088, "step": 1856 }, { "epoch": 0.6329243353783232, "grad_norm": 22.436591647253632, "learning_rate": 9.630792707330869e-06, "loss": 3.322, "step": 1857 }, { "epoch": 0.6332651670074982, "grad_norm": 32.648809351022855, "learning_rate": 9.630044456080968e-06, "loss": 3.4247, "step": 1858 }, { "epoch": 0.6336059986366734, "grad_norm": 15.771281612998978, "learning_rate": 9.629295476506644e-06, "loss": 3.2927, "step": 1859 }, { "epoch": 0.6339468302658486, "grad_norm": 53.36057380969888, "learning_rate": 9.628545768725713e-06, "loss": 3.3199, "step": 1860 }, { "epoch": 0.6342876618950238, "grad_norm": 18.92978631518339, "learning_rate": 9.627795332856107e-06, "loss": 2.8132, "step": 1861 }, { "epoch": 0.634628493524199, "grad_norm": 15.97104818504492, "learning_rate": 9.627044169015872e-06, "loss": 3.1322, "step": 1862 }, { "epoch": 0.6349693251533742, "grad_norm": 22.451825560784897, "learning_rate": 9.62629227732317e-06, "loss": 3.9186, "step": 1863 }, { "epoch": 0.6353101567825494, "grad_norm": 18.77877373387588, "learning_rate": 9.625539657896278e-06, "loss": 3.3421, "step": 1864 }, { "epoch": 0.6356509884117246, "grad_norm": 12.298264447879522, "learning_rate": 9.624786310853585e-06, "loss": 2.9925, "step": 1865 }, { "epoch": 0.6359918200408998, "grad_norm": 12.011692444925417, "learning_rate": 9.624032236313594e-06, "loss": 3.3325, "step": 1866 }, { "epoch": 0.636332651670075, "grad_norm": 18.262910516244414, "learning_rate": 9.623277434394925e-06, "loss": 3.0256, "step": 1867 }, { "epoch": 0.6366734832992502, "grad_norm": 19.748418919555885, "learning_rate": 9.622521905216312e-06, "loss": 3.6342, "step": 1868 }, { "epoch": 0.6370143149284253, "grad_norm": 15.754092164611281, "learning_rate": 9.6217656488966e-06, "loss": 3.3024, "step": 1869 }, { "epoch": 0.6373551465576005, "grad_norm": 15.970137893647482, "learning_rate": 9.621008665554756e-06, "loss": 3.3433, "step": 1870 }, { "epoch": 0.6376959781867757, "grad_norm": 18.526916135036977, "learning_rate": 9.620250955309852e-06, "loss": 3.4288, "step": 1871 }, { "epoch": 0.6380368098159509, "grad_norm": 14.551428276763954, "learning_rate": 9.619492518281081e-06, "loss": 3.3101, "step": 1872 }, { "epoch": 0.6383776414451261, "grad_norm": 15.74653833954483, "learning_rate": 9.618733354587748e-06, "loss": 3.4635, "step": 1873 }, { "epoch": 0.6387184730743013, "grad_norm": 14.75290322407769, "learning_rate": 9.617973464349272e-06, "loss": 2.9136, "step": 1874 }, { "epoch": 0.6390593047034765, "grad_norm": 21.96957910863123, "learning_rate": 9.617212847685188e-06, "loss": 3.3328, "step": 1875 }, { "epoch": 0.6394001363326517, "grad_norm": 18.264924863733192, "learning_rate": 9.61645150471514e-06, "loss": 3.3395, "step": 1876 }, { "epoch": 0.6397409679618269, "grad_norm": 15.346664389066275, "learning_rate": 9.615689435558897e-06, "loss": 3.4789, "step": 1877 }, { "epoch": 0.6400817995910021, "grad_norm": 23.667003584791548, "learning_rate": 9.61492664033633e-06, "loss": 3.5531, "step": 1878 }, { "epoch": 0.6404226312201773, "grad_norm": 47.26762825356943, "learning_rate": 9.614163119167431e-06, "loss": 3.9377, "step": 1879 }, { "epoch": 0.6407634628493524, "grad_norm": 21.21045084280319, "learning_rate": 9.613398872172305e-06, "loss": 3.2274, "step": 1880 }, { "epoch": 0.6411042944785276, "grad_norm": 23.88903123114681, "learning_rate": 9.612633899471173e-06, "loss": 3.1888, "step": 1881 }, { "epoch": 0.6414451261077028, "grad_norm": 15.04820980973465, "learning_rate": 9.611868201184366e-06, "loss": 3.2992, "step": 1882 }, { "epoch": 0.641785957736878, "grad_norm": 18.458041362633583, "learning_rate": 9.611101777432333e-06, "loss": 2.9019, "step": 1883 }, { "epoch": 0.6421267893660532, "grad_norm": 16.93714283146721, "learning_rate": 9.610334628335635e-06, "loss": 3.7668, "step": 1884 }, { "epoch": 0.6424676209952284, "grad_norm": 18.5555549969013, "learning_rate": 9.609566754014946e-06, "loss": 3.0663, "step": 1885 }, { "epoch": 0.6428084526244036, "grad_norm": 22.477578878578978, "learning_rate": 9.608798154591058e-06, "loss": 4.1019, "step": 1886 }, { "epoch": 0.6431492842535788, "grad_norm": 25.17848803550175, "learning_rate": 9.608028830184872e-06, "loss": 3.1324, "step": 1887 }, { "epoch": 0.643490115882754, "grad_norm": 17.313547263013028, "learning_rate": 9.60725878091741e-06, "loss": 3.1112, "step": 1888 }, { "epoch": 0.6438309475119292, "grad_norm": 16.3536722105224, "learning_rate": 9.606488006909799e-06, "loss": 2.8149, "step": 1889 }, { "epoch": 0.6441717791411042, "grad_norm": 19.700609418781053, "learning_rate": 9.60571650828329e-06, "loss": 3.8028, "step": 1890 }, { "epoch": 0.6445126107702794, "grad_norm": 17.783987025409406, "learning_rate": 9.604944285159236e-06, "loss": 3.4296, "step": 1891 }, { "epoch": 0.6448534423994546, "grad_norm": 22.636514256158335, "learning_rate": 9.604171337659118e-06, "loss": 3.2642, "step": 1892 }, { "epoch": 0.6451942740286298, "grad_norm": 14.140884043703355, "learning_rate": 9.60339766590452e-06, "loss": 2.9438, "step": 1893 }, { "epoch": 0.645535105657805, "grad_norm": 17.11698372260806, "learning_rate": 9.602623270017142e-06, "loss": 3.3118, "step": 1894 }, { "epoch": 0.6458759372869802, "grad_norm": 18.73075649203496, "learning_rate": 9.601848150118804e-06, "loss": 3.504, "step": 1895 }, { "epoch": 0.6462167689161554, "grad_norm": 17.500809016637444, "learning_rate": 9.601072306331431e-06, "loss": 3.6992, "step": 1896 }, { "epoch": 0.6465576005453306, "grad_norm": 11.73472881915135, "learning_rate": 9.60029573877707e-06, "loss": 2.6807, "step": 1897 }, { "epoch": 0.6468984321745058, "grad_norm": 16.040480551356406, "learning_rate": 9.599518447577875e-06, "loss": 3.3502, "step": 1898 }, { "epoch": 0.647239263803681, "grad_norm": 17.70617568612734, "learning_rate": 9.598740432856121e-06, "loss": 3.2048, "step": 1899 }, { "epoch": 0.6475800954328562, "grad_norm": 18.35507808954349, "learning_rate": 9.597961694734187e-06, "loss": 2.7568, "step": 1900 }, { "epoch": 0.6479209270620313, "grad_norm": 13.270561119274776, "learning_rate": 9.59718223333458e-06, "loss": 3.4024, "step": 1901 }, { "epoch": 0.6482617586912065, "grad_norm": 21.335269259904248, "learning_rate": 9.596402048779903e-06, "loss": 3.4578, "step": 1902 }, { "epoch": 0.6486025903203817, "grad_norm": 23.36209989585706, "learning_rate": 9.595621141192888e-06, "loss": 3.5526, "step": 1903 }, { "epoch": 0.6489434219495569, "grad_norm": 18.670957652435145, "learning_rate": 9.594839510696372e-06, "loss": 3.6742, "step": 1904 }, { "epoch": 0.6492842535787321, "grad_norm": 19.5686217959044, "learning_rate": 9.594057157413312e-06, "loss": 3.2888, "step": 1905 }, { "epoch": 0.6496250852079073, "grad_norm": 15.00830071391283, "learning_rate": 9.593274081466772e-06, "loss": 3.2568, "step": 1906 }, { "epoch": 0.6499659168370825, "grad_norm": 16.54793292260827, "learning_rate": 9.592490282979936e-06, "loss": 3.366, "step": 1907 }, { "epoch": 0.6503067484662577, "grad_norm": 17.764767429075814, "learning_rate": 9.591705762076095e-06, "loss": 3.6577, "step": 1908 }, { "epoch": 0.6506475800954329, "grad_norm": 13.529107916612718, "learning_rate": 9.59092051887866e-06, "loss": 3.2463, "step": 1909 }, { "epoch": 0.6509884117246081, "grad_norm": 16.26038191996233, "learning_rate": 9.590134553511152e-06, "loss": 3.309, "step": 1910 }, { "epoch": 0.6513292433537833, "grad_norm": 18.73827926920426, "learning_rate": 9.589347866097204e-06, "loss": 3.3594, "step": 1911 }, { "epoch": 0.6516700749829584, "grad_norm": 26.976021599516173, "learning_rate": 9.588560456760571e-06, "loss": 3.4447, "step": 1912 }, { "epoch": 0.6520109066121336, "grad_norm": 13.151316619228554, "learning_rate": 9.587772325625111e-06, "loss": 3.1514, "step": 1913 }, { "epoch": 0.6523517382413088, "grad_norm": 18.87965847820063, "learning_rate": 9.5869834728148e-06, "loss": 3.1424, "step": 1914 }, { "epoch": 0.652692569870484, "grad_norm": 11.904140114532062, "learning_rate": 9.586193898453731e-06, "loss": 2.9558, "step": 1915 }, { "epoch": 0.6530334014996592, "grad_norm": 14.223770608299395, "learning_rate": 9.585403602666105e-06, "loss": 3.0521, "step": 1916 }, { "epoch": 0.6533742331288344, "grad_norm": 18.08975802862374, "learning_rate": 9.584612585576239e-06, "loss": 3.0353, "step": 1917 }, { "epoch": 0.6537150647580096, "grad_norm": 21.310963754251947, "learning_rate": 9.583820847308561e-06, "loss": 3.9582, "step": 1918 }, { "epoch": 0.6540558963871848, "grad_norm": 16.260009858218904, "learning_rate": 9.583028387987619e-06, "loss": 3.3373, "step": 1919 }, { "epoch": 0.65439672801636, "grad_norm": 16.61412906976423, "learning_rate": 9.582235207738065e-06, "loss": 3.0913, "step": 1920 }, { "epoch": 0.6547375596455351, "grad_norm": 16.7022067110724, "learning_rate": 9.581441306684674e-06, "loss": 3.5353, "step": 1921 }, { "epoch": 0.6550783912747103, "grad_norm": 12.27511483728523, "learning_rate": 9.580646684952326e-06, "loss": 2.989, "step": 1922 }, { "epoch": 0.6554192229038854, "grad_norm": 43.68046807376579, "learning_rate": 9.579851342666022e-06, "loss": 2.4732, "step": 1923 }, { "epoch": 0.6557600545330606, "grad_norm": 19.9437374392077, "learning_rate": 9.579055279950869e-06, "loss": 3.1407, "step": 1924 }, { "epoch": 0.6561008861622358, "grad_norm": 18.39188423281928, "learning_rate": 9.578258496932093e-06, "loss": 3.9077, "step": 1925 }, { "epoch": 0.656441717791411, "grad_norm": 17.043915977312523, "learning_rate": 9.57746099373503e-06, "loss": 3.9808, "step": 1926 }, { "epoch": 0.6567825494205862, "grad_norm": 15.86645245485466, "learning_rate": 9.576662770485132e-06, "loss": 3.0901, "step": 1927 }, { "epoch": 0.6571233810497614, "grad_norm": 12.033290203276225, "learning_rate": 9.57586382730796e-06, "loss": 2.2891, "step": 1928 }, { "epoch": 0.6574642126789366, "grad_norm": 15.604763197265202, "learning_rate": 9.575064164329193e-06, "loss": 3.1663, "step": 1929 }, { "epoch": 0.6578050443081118, "grad_norm": 17.70071445940186, "learning_rate": 9.574263781674623e-06, "loss": 3.694, "step": 1930 }, { "epoch": 0.658145875937287, "grad_norm": 15.716572900608769, "learning_rate": 9.573462679470147e-06, "loss": 3.1166, "step": 1931 }, { "epoch": 0.6584867075664622, "grad_norm": 15.004924006794022, "learning_rate": 9.572660857841789e-06, "loss": 3.0325, "step": 1932 }, { "epoch": 0.6588275391956373, "grad_norm": 25.699409550847736, "learning_rate": 9.571858316915674e-06, "loss": 2.957, "step": 1933 }, { "epoch": 0.6591683708248125, "grad_norm": 32.67159332154362, "learning_rate": 9.571055056818044e-06, "loss": 3.4626, "step": 1934 }, { "epoch": 0.6595092024539877, "grad_norm": 14.598237438208814, "learning_rate": 9.57025107767526e-06, "loss": 3.1902, "step": 1935 }, { "epoch": 0.6598500340831629, "grad_norm": 16.25388583422308, "learning_rate": 9.569446379613788e-06, "loss": 3.2615, "step": 1936 }, { "epoch": 0.6601908657123381, "grad_norm": 17.66730506762041, "learning_rate": 9.56864096276021e-06, "loss": 3.2894, "step": 1937 }, { "epoch": 0.6605316973415133, "grad_norm": 18.102763026683714, "learning_rate": 9.567834827241223e-06, "loss": 2.8759, "step": 1938 }, { "epoch": 0.6608725289706885, "grad_norm": 16.658696811542644, "learning_rate": 9.567027973183633e-06, "loss": 2.788, "step": 1939 }, { "epoch": 0.6612133605998637, "grad_norm": 16.98936384727511, "learning_rate": 9.566220400714363e-06, "loss": 4.0868, "step": 1940 }, { "epoch": 0.6615541922290389, "grad_norm": 26.3458388830922, "learning_rate": 9.565412109960446e-06, "loss": 3.1901, "step": 1941 }, { "epoch": 0.6618950238582141, "grad_norm": 15.021459857865608, "learning_rate": 9.564603101049033e-06, "loss": 3.2874, "step": 1942 }, { "epoch": 0.6622358554873893, "grad_norm": 12.759852816393614, "learning_rate": 9.563793374107379e-06, "loss": 3.0907, "step": 1943 }, { "epoch": 0.6625766871165644, "grad_norm": 23.79871727152637, "learning_rate": 9.56298292926286e-06, "loss": 2.6903, "step": 1944 }, { "epoch": 0.6629175187457396, "grad_norm": 28.191367496337623, "learning_rate": 9.562171766642963e-06, "loss": 3.3377, "step": 1945 }, { "epoch": 0.6632583503749148, "grad_norm": 13.866116485727343, "learning_rate": 9.561359886375286e-06, "loss": 3.0477, "step": 1946 }, { "epoch": 0.66359918200409, "grad_norm": 19.243519963378194, "learning_rate": 9.56054728858754e-06, "loss": 3.4226, "step": 1947 }, { "epoch": 0.6639400136332652, "grad_norm": 15.929398638255925, "learning_rate": 9.559733973407552e-06, "loss": 2.9338, "step": 1948 }, { "epoch": 0.6642808452624404, "grad_norm": 11.56942208140459, "learning_rate": 9.558919940963258e-06, "loss": 2.8939, "step": 1949 }, { "epoch": 0.6646216768916156, "grad_norm": 19.16066336187289, "learning_rate": 9.55810519138271e-06, "loss": 3.7111, "step": 1950 }, { "epoch": 0.6649625085207908, "grad_norm": 19.049840050713676, "learning_rate": 9.55728972479407e-06, "loss": 2.9145, "step": 1951 }, { "epoch": 0.665303340149966, "grad_norm": 22.239054737509395, "learning_rate": 9.556473541325616e-06, "loss": 3.986, "step": 1952 }, { "epoch": 0.6656441717791411, "grad_norm": 27.066263272024667, "learning_rate": 9.555656641105735e-06, "loss": 3.7447, "step": 1953 }, { "epoch": 0.6659850034083163, "grad_norm": 16.99177305926694, "learning_rate": 9.554839024262929e-06, "loss": 3.4471, "step": 1954 }, { "epoch": 0.6663258350374914, "grad_norm": 25.00570873731563, "learning_rate": 9.554020690925812e-06, "loss": 3.8342, "step": 1955 }, { "epoch": 0.6666666666666666, "grad_norm": 14.789899790011257, "learning_rate": 9.553201641223112e-06, "loss": 3.6569, "step": 1956 }, { "epoch": 0.6670074982958418, "grad_norm": 18.89636273545075, "learning_rate": 9.552381875283671e-06, "loss": 2.9707, "step": 1957 }, { "epoch": 0.667348329925017, "grad_norm": 22.09130500278689, "learning_rate": 9.551561393236437e-06, "loss": 3.4975, "step": 1958 }, { "epoch": 0.6676891615541922, "grad_norm": 18.294943971563203, "learning_rate": 9.550740195210477e-06, "loss": 3.7537, "step": 1959 }, { "epoch": 0.6680299931833674, "grad_norm": 15.164806742011505, "learning_rate": 9.54991828133497e-06, "loss": 3.2224, "step": 1960 }, { "epoch": 0.6683708248125426, "grad_norm": 18.721670109188437, "learning_rate": 9.549095651739204e-06, "loss": 3.9872, "step": 1961 }, { "epoch": 0.6687116564417178, "grad_norm": 17.45966569474267, "learning_rate": 9.548272306552584e-06, "loss": 3.0352, "step": 1962 }, { "epoch": 0.669052488070893, "grad_norm": 17.85052185427996, "learning_rate": 9.547448245904624e-06, "loss": 3.1362, "step": 1963 }, { "epoch": 0.6693933197000682, "grad_norm": 24.286225086708743, "learning_rate": 9.546623469924954e-06, "loss": 3.3246, "step": 1964 }, { "epoch": 0.6697341513292433, "grad_norm": 14.403746426916642, "learning_rate": 9.545797978743312e-06, "loss": 3.3748, "step": 1965 }, { "epoch": 0.6700749829584185, "grad_norm": 19.134102992422886, "learning_rate": 9.54497177248955e-06, "loss": 4.0349, "step": 1966 }, { "epoch": 0.6704158145875937, "grad_norm": 37.96120278096082, "learning_rate": 9.54414485129364e-06, "loss": 3.3466, "step": 1967 }, { "epoch": 0.6707566462167689, "grad_norm": 20.098975124479157, "learning_rate": 9.543317215285652e-06, "loss": 3.0479, "step": 1968 }, { "epoch": 0.6710974778459441, "grad_norm": 18.712315631767304, "learning_rate": 9.542488864595782e-06, "loss": 3.378, "step": 1969 }, { "epoch": 0.6714383094751193, "grad_norm": 15.05536913786104, "learning_rate": 9.54165979935433e-06, "loss": 3.044, "step": 1970 }, { "epoch": 0.6717791411042945, "grad_norm": 31.09397876575775, "learning_rate": 9.540830019691714e-06, "loss": 3.3005, "step": 1971 }, { "epoch": 0.6721199727334697, "grad_norm": 12.36476215528246, "learning_rate": 9.539999525738459e-06, "loss": 2.9023, "step": 1972 }, { "epoch": 0.6724608043626449, "grad_norm": 15.809428659118469, "learning_rate": 9.539168317625205e-06, "loss": 3.0042, "step": 1973 }, { "epoch": 0.6728016359918201, "grad_norm": 19.490097196577153, "learning_rate": 9.538336395482705e-06, "loss": 3.5293, "step": 1974 }, { "epoch": 0.6731424676209953, "grad_norm": 25.19323498374576, "learning_rate": 9.537503759441825e-06, "loss": 3.9339, "step": 1975 }, { "epoch": 0.6734832992501704, "grad_norm": 15.062160769808289, "learning_rate": 9.536670409633542e-06, "loss": 3.1639, "step": 1976 }, { "epoch": 0.6738241308793456, "grad_norm": 19.9092094388048, "learning_rate": 9.535836346188943e-06, "loss": 2.8818, "step": 1977 }, { "epoch": 0.6741649625085208, "grad_norm": 16.669856716141275, "learning_rate": 9.53500156923923e-06, "loss": 3.3019, "step": 1978 }, { "epoch": 0.674505794137696, "grad_norm": 11.682213839884271, "learning_rate": 9.534166078915716e-06, "loss": 2.6365, "step": 1979 }, { "epoch": 0.6748466257668712, "grad_norm": 20.334246464188695, "learning_rate": 9.533329875349831e-06, "loss": 3.2939, "step": 1980 }, { "epoch": 0.6751874573960464, "grad_norm": 15.549475218468288, "learning_rate": 9.53249295867311e-06, "loss": 3.4871, "step": 1981 }, { "epoch": 0.6755282890252216, "grad_norm": 13.64488090397031, "learning_rate": 9.531655329017203e-06, "loss": 3.0567, "step": 1982 }, { "epoch": 0.6758691206543967, "grad_norm": 14.805701269289413, "learning_rate": 9.530816986513874e-06, "loss": 3.067, "step": 1983 }, { "epoch": 0.676209952283572, "grad_norm": 15.277722534191799, "learning_rate": 9.529977931294996e-06, "loss": 3.3365, "step": 1984 }, { "epoch": 0.6765507839127471, "grad_norm": 28.54031385126413, "learning_rate": 9.529138163492557e-06, "loss": 2.9569, "step": 1985 }, { "epoch": 0.6768916155419223, "grad_norm": 22.706198928572856, "learning_rate": 9.528297683238656e-06, "loss": 3.2074, "step": 1986 }, { "epoch": 0.6772324471710974, "grad_norm": 25.845670187432944, "learning_rate": 9.527456490665503e-06, "loss": 3.8666, "step": 1987 }, { "epoch": 0.6775732788002726, "grad_norm": 15.485237987012194, "learning_rate": 9.52661458590542e-06, "loss": 3.2799, "step": 1988 }, { "epoch": 0.6779141104294478, "grad_norm": 15.0153036051137, "learning_rate": 9.525771969090844e-06, "loss": 2.4864, "step": 1989 }, { "epoch": 0.678254942058623, "grad_norm": 24.39427774344475, "learning_rate": 9.524928640354322e-06, "loss": 3.6532, "step": 1990 }, { "epoch": 0.6785957736877982, "grad_norm": 14.881036940036829, "learning_rate": 9.524084599828513e-06, "loss": 3.2092, "step": 1991 }, { "epoch": 0.6789366053169734, "grad_norm": 20.068688339098646, "learning_rate": 9.523239847646186e-06, "loss": 3.7028, "step": 1992 }, { "epoch": 0.6792774369461486, "grad_norm": 26.19946953227523, "learning_rate": 9.522394383940224e-06, "loss": 3.5656, "step": 1993 }, { "epoch": 0.6796182685753238, "grad_norm": 16.342682339367393, "learning_rate": 9.521548208843623e-06, "loss": 3.5323, "step": 1994 }, { "epoch": 0.679959100204499, "grad_norm": 16.464620805398315, "learning_rate": 9.520701322489492e-06, "loss": 3.0097, "step": 1995 }, { "epoch": 0.6802999318336742, "grad_norm": 18.069072951410323, "learning_rate": 9.519853725011043e-06, "loss": 3.7281, "step": 1996 }, { "epoch": 0.6806407634628494, "grad_norm": 11.019877625539294, "learning_rate": 9.519005416541613e-06, "loss": 2.6042, "step": 1997 }, { "epoch": 0.6809815950920245, "grad_norm": 13.574420581578858, "learning_rate": 9.518156397214643e-06, "loss": 2.6014, "step": 1998 }, { "epoch": 0.6813224267211997, "grad_norm": 14.725261245536851, "learning_rate": 9.517306667163684e-06, "loss": 2.9867, "step": 1999 }, { "epoch": 0.6816632583503749, "grad_norm": 19.87163919316535, "learning_rate": 9.516456226522405e-06, "loss": 3.8756, "step": 2000 }, { "epoch": 0.6820040899795501, "grad_norm": 38.296241816809705, "learning_rate": 9.515605075424582e-06, "loss": 3.1252, "step": 2001 }, { "epoch": 0.6823449216087253, "grad_norm": 21.714693392757273, "learning_rate": 9.514753214004106e-06, "loss": 3.7938, "step": 2002 }, { "epoch": 0.6826857532379005, "grad_norm": 22.01524703004069, "learning_rate": 9.513900642394975e-06, "loss": 2.9821, "step": 2003 }, { "epoch": 0.6830265848670757, "grad_norm": 19.897512714745915, "learning_rate": 9.513047360731307e-06, "loss": 3.2912, "step": 2004 }, { "epoch": 0.6833674164962509, "grad_norm": 15.700482501062833, "learning_rate": 9.512193369147322e-06, "loss": 3.551, "step": 2005 }, { "epoch": 0.6837082481254261, "grad_norm": 15.248470032526114, "learning_rate": 9.511338667777357e-06, "loss": 2.8459, "step": 2006 }, { "epoch": 0.6840490797546013, "grad_norm": 17.794658509112953, "learning_rate": 9.510483256755863e-06, "loss": 3.65, "step": 2007 }, { "epoch": 0.6843899113837764, "grad_norm": 14.23043694221653, "learning_rate": 9.509627136217396e-06, "loss": 2.8497, "step": 2008 }, { "epoch": 0.6847307430129516, "grad_norm": 55.315257846910114, "learning_rate": 9.50877030629663e-06, "loss": 3.594, "step": 2009 }, { "epoch": 0.6850715746421268, "grad_norm": 19.991325447621154, "learning_rate": 9.507912767128347e-06, "loss": 3.5262, "step": 2010 }, { "epoch": 0.685412406271302, "grad_norm": 15.684575762939904, "learning_rate": 9.50705451884744e-06, "loss": 3.1289, "step": 2011 }, { "epoch": 0.6857532379004772, "grad_norm": 16.0804914661727, "learning_rate": 9.506195561588914e-06, "loss": 3.4769, "step": 2012 }, { "epoch": 0.6860940695296524, "grad_norm": 17.381004583815827, "learning_rate": 9.50533589548789e-06, "loss": 3.6077, "step": 2013 }, { "epoch": 0.6864349011588275, "grad_norm": 21.176472953167355, "learning_rate": 9.504475520679594e-06, "loss": 3.1482, "step": 2014 }, { "epoch": 0.6867757327880027, "grad_norm": 24.455751375714787, "learning_rate": 9.503614437299368e-06, "loss": 3.044, "step": 2015 }, { "epoch": 0.6871165644171779, "grad_norm": 27.525874253927697, "learning_rate": 9.502752645482662e-06, "loss": 2.938, "step": 2016 }, { "epoch": 0.6874573960463531, "grad_norm": 21.088439896738464, "learning_rate": 9.501890145365042e-06, "loss": 3.218, "step": 2017 }, { "epoch": 0.6877982276755283, "grad_norm": 12.957538250224953, "learning_rate": 9.50102693708218e-06, "loss": 2.9292, "step": 2018 }, { "epoch": 0.6881390593047034, "grad_norm": 12.066691008047316, "learning_rate": 9.500163020769864e-06, "loss": 3.2258, "step": 2019 }, { "epoch": 0.6884798909338786, "grad_norm": 15.473928274866065, "learning_rate": 9.49929839656399e-06, "loss": 3.0083, "step": 2020 }, { "epoch": 0.6888207225630538, "grad_norm": 14.863772285117992, "learning_rate": 9.49843306460057e-06, "loss": 3.1327, "step": 2021 }, { "epoch": 0.689161554192229, "grad_norm": 16.153843672496812, "learning_rate": 9.497567025015719e-06, "loss": 3.0741, "step": 2022 }, { "epoch": 0.6895023858214042, "grad_norm": 37.86599087583578, "learning_rate": 9.496700277945671e-06, "loss": 3.4046, "step": 2023 }, { "epoch": 0.6898432174505794, "grad_norm": 15.813383182354107, "learning_rate": 9.495832823526769e-06, "loss": 3.1282, "step": 2024 }, { "epoch": 0.6901840490797546, "grad_norm": 14.837438996710222, "learning_rate": 9.494964661895468e-06, "loss": 3.6571, "step": 2025 }, { "epoch": 0.6905248807089298, "grad_norm": 16.226959638496016, "learning_rate": 9.494095793188332e-06, "loss": 3.1592, "step": 2026 }, { "epoch": 0.690865712338105, "grad_norm": 21.39002557820156, "learning_rate": 9.493226217542037e-06, "loss": 3.7845, "step": 2027 }, { "epoch": 0.6912065439672802, "grad_norm": 13.73495251411619, "learning_rate": 9.492355935093373e-06, "loss": 3.0983, "step": 2028 }, { "epoch": 0.6915473755964554, "grad_norm": 23.653164891281357, "learning_rate": 9.491484945979237e-06, "loss": 3.507, "step": 2029 }, { "epoch": 0.6918882072256305, "grad_norm": 20.713499765264057, "learning_rate": 9.49061325033664e-06, "loss": 3.5846, "step": 2030 }, { "epoch": 0.6922290388548057, "grad_norm": 14.889074261047142, "learning_rate": 9.489740848302702e-06, "loss": 3.2062, "step": 2031 }, { "epoch": 0.6925698704839809, "grad_norm": 24.970506449710747, "learning_rate": 9.488867740014654e-06, "loss": 2.7953, "step": 2032 }, { "epoch": 0.6929107021131561, "grad_norm": 11.579003408088393, "learning_rate": 9.487993925609845e-06, "loss": 2.9836, "step": 2033 }, { "epoch": 0.6932515337423313, "grad_norm": 14.30339206451075, "learning_rate": 9.487119405225722e-06, "loss": 3.4967, "step": 2034 }, { "epoch": 0.6935923653715065, "grad_norm": 21.164812573692313, "learning_rate": 9.486244178999857e-06, "loss": 2.789, "step": 2035 }, { "epoch": 0.6939331970006817, "grad_norm": 15.530937088809129, "learning_rate": 9.485368247069922e-06, "loss": 3.2896, "step": 2036 }, { "epoch": 0.6942740286298569, "grad_norm": 21.434645487326826, "learning_rate": 9.484491609573709e-06, "loss": 4.0006, "step": 2037 }, { "epoch": 0.6946148602590321, "grad_norm": 27.88718392251804, "learning_rate": 9.483614266649113e-06, "loss": 3.2253, "step": 2038 }, { "epoch": 0.6949556918882073, "grad_norm": 23.76352991016491, "learning_rate": 9.482736218434144e-06, "loss": 3.7524, "step": 2039 }, { "epoch": 0.6952965235173824, "grad_norm": 20.62580595668517, "learning_rate": 9.481857465066923e-06, "loss": 3.8301, "step": 2040 }, { "epoch": 0.6956373551465576, "grad_norm": 15.922671726495734, "learning_rate": 9.480978006685681e-06, "loss": 2.7674, "step": 2041 }, { "epoch": 0.6959781867757328, "grad_norm": 15.586046888614947, "learning_rate": 9.480097843428762e-06, "loss": 3.1495, "step": 2042 }, { "epoch": 0.696319018404908, "grad_norm": 17.998063666559656, "learning_rate": 9.479216975434619e-06, "loss": 2.6914, "step": 2043 }, { "epoch": 0.6966598500340832, "grad_norm": 15.003571196411162, "learning_rate": 9.478335402841813e-06, "loss": 3.193, "step": 2044 }, { "epoch": 0.6970006816632583, "grad_norm": 17.479786551972047, "learning_rate": 9.477453125789023e-06, "loss": 3.4111, "step": 2045 }, { "epoch": 0.6973415132924335, "grad_norm": 14.542936529992526, "learning_rate": 9.476570144415031e-06, "loss": 3.2874, "step": 2046 }, { "epoch": 0.6976823449216087, "grad_norm": 20.954687418567097, "learning_rate": 9.475686458858735e-06, "loss": 3.4455, "step": 2047 }, { "epoch": 0.6980231765507839, "grad_norm": 16.384454023612744, "learning_rate": 9.474802069259143e-06, "loss": 3.7153, "step": 2048 }, { "epoch": 0.6983640081799591, "grad_norm": 15.239032377171956, "learning_rate": 9.47391697575537e-06, "loss": 3.5753, "step": 2049 }, { "epoch": 0.6987048398091343, "grad_norm": 17.779814979591002, "learning_rate": 9.473031178486651e-06, "loss": 3.1986, "step": 2050 }, { "epoch": 0.6990456714383094, "grad_norm": 13.291188816696545, "learning_rate": 9.472144677592318e-06, "loss": 2.8296, "step": 2051 }, { "epoch": 0.6993865030674846, "grad_norm": 19.592809071272427, "learning_rate": 9.471257473211828e-06, "loss": 3.5864, "step": 2052 }, { "epoch": 0.6997273346966598, "grad_norm": 35.99881977867805, "learning_rate": 9.470369565484735e-06, "loss": 4.0762, "step": 2053 }, { "epoch": 0.700068166325835, "grad_norm": 21.474521619323784, "learning_rate": 9.469480954550715e-06, "loss": 3.3391, "step": 2054 }, { "epoch": 0.7004089979550102, "grad_norm": 15.742164821540676, "learning_rate": 9.468591640549552e-06, "loss": 3.2388, "step": 2055 }, { "epoch": 0.7007498295841854, "grad_norm": 15.691178093887359, "learning_rate": 9.467701623621133e-06, "loss": 2.8942, "step": 2056 }, { "epoch": 0.7010906612133606, "grad_norm": 17.38995279912389, "learning_rate": 9.466810903905463e-06, "loss": 3.2667, "step": 2057 }, { "epoch": 0.7014314928425358, "grad_norm": 15.68282722800537, "learning_rate": 9.465919481542658e-06, "loss": 3.3339, "step": 2058 }, { "epoch": 0.701772324471711, "grad_norm": 11.103329742117824, "learning_rate": 9.465027356672939e-06, "loss": 2.5684, "step": 2059 }, { "epoch": 0.7021131561008862, "grad_norm": 16.384328869947428, "learning_rate": 9.464134529436644e-06, "loss": 2.812, "step": 2060 }, { "epoch": 0.7024539877300614, "grad_norm": 16.950746080588612, "learning_rate": 9.463240999974216e-06, "loss": 2.8176, "step": 2061 }, { "epoch": 0.7027948193592365, "grad_norm": 20.389368117168754, "learning_rate": 9.462346768426211e-06, "loss": 3.7322, "step": 2062 }, { "epoch": 0.7031356509884117, "grad_norm": 24.55072741774868, "learning_rate": 9.461451834933297e-06, "loss": 3.6454, "step": 2063 }, { "epoch": 0.7034764826175869, "grad_norm": 13.8184814815036, "learning_rate": 9.460556199636249e-06, "loss": 3.0659, "step": 2064 }, { "epoch": 0.7038173142467621, "grad_norm": 14.674628169910356, "learning_rate": 9.459659862675953e-06, "loss": 3.0557, "step": 2065 }, { "epoch": 0.7041581458759373, "grad_norm": 23.094601615012294, "learning_rate": 9.45876282419341e-06, "loss": 3.2231, "step": 2066 }, { "epoch": 0.7044989775051125, "grad_norm": 12.421104573764032, "learning_rate": 9.457865084329725e-06, "loss": 3.1167, "step": 2067 }, { "epoch": 0.7048398091342877, "grad_norm": 13.500578859018319, "learning_rate": 9.456966643226115e-06, "loss": 2.9939, "step": 2068 }, { "epoch": 0.7051806407634629, "grad_norm": 15.545337927020801, "learning_rate": 9.45606750102391e-06, "loss": 3.318, "step": 2069 }, { "epoch": 0.7055214723926381, "grad_norm": 27.60818941510369, "learning_rate": 9.455167657864551e-06, "loss": 3.4039, "step": 2070 }, { "epoch": 0.7058623040218133, "grad_norm": 18.942082086841143, "learning_rate": 9.454267113889584e-06, "loss": 3.4197, "step": 2071 }, { "epoch": 0.7062031356509885, "grad_norm": 17.033486535274417, "learning_rate": 9.453365869240666e-06, "loss": 3.2668, "step": 2072 }, { "epoch": 0.7065439672801636, "grad_norm": 34.93472275254403, "learning_rate": 9.45246392405957e-06, "loss": 3.7864, "step": 2073 }, { "epoch": 0.7068847989093388, "grad_norm": 22.216679073537343, "learning_rate": 9.451561278488175e-06, "loss": 3.5603, "step": 2074 }, { "epoch": 0.707225630538514, "grad_norm": 16.80746069633037, "learning_rate": 9.45065793266847e-06, "loss": 3.5328, "step": 2075 }, { "epoch": 0.7075664621676891, "grad_norm": 18.102792793165516, "learning_rate": 9.449753886742555e-06, "loss": 3.992, "step": 2076 }, { "epoch": 0.7079072937968643, "grad_norm": 20.898992756954325, "learning_rate": 9.44884914085264e-06, "loss": 3.0073, "step": 2077 }, { "epoch": 0.7082481254260395, "grad_norm": 24.514984834994927, "learning_rate": 9.447943695141047e-06, "loss": 3.6462, "step": 2078 }, { "epoch": 0.7085889570552147, "grad_norm": 18.497925421788572, "learning_rate": 9.447037549750203e-06, "loss": 3.3344, "step": 2079 }, { "epoch": 0.7089297886843899, "grad_norm": 25.20486231040838, "learning_rate": 9.446130704822649e-06, "loss": 3.8382, "step": 2080 }, { "epoch": 0.7092706203135651, "grad_norm": 14.465797807478049, "learning_rate": 9.445223160501038e-06, "loss": 2.9517, "step": 2081 }, { "epoch": 0.7096114519427403, "grad_norm": 22.067345121809517, "learning_rate": 9.444314916928128e-06, "loss": 3.0826, "step": 2082 }, { "epoch": 0.7099522835719154, "grad_norm": 16.106941238533324, "learning_rate": 9.443405974246788e-06, "loss": 2.9081, "step": 2083 }, { "epoch": 0.7102931152010906, "grad_norm": 12.03865445507256, "learning_rate": 9.442496332600001e-06, "loss": 2.8307, "step": 2084 }, { "epoch": 0.7106339468302658, "grad_norm": 14.76250585182107, "learning_rate": 9.441585992130857e-06, "loss": 3.301, "step": 2085 }, { "epoch": 0.710974778459441, "grad_norm": 16.030253900005775, "learning_rate": 9.440674952982556e-06, "loss": 3.4222, "step": 2086 }, { "epoch": 0.7113156100886162, "grad_norm": 35.08646623456906, "learning_rate": 9.439763215298407e-06, "loss": 3.248, "step": 2087 }, { "epoch": 0.7116564417177914, "grad_norm": 25.16838158433103, "learning_rate": 9.43885077922183e-06, "loss": 3.4755, "step": 2088 }, { "epoch": 0.7119972733469666, "grad_norm": 12.953209130445995, "learning_rate": 9.437937644896355e-06, "loss": 3.3373, "step": 2089 }, { "epoch": 0.7123381049761418, "grad_norm": 16.211260306196767, "learning_rate": 9.437023812465623e-06, "loss": 3.1976, "step": 2090 }, { "epoch": 0.712678936605317, "grad_norm": 18.180820567999817, "learning_rate": 9.436109282073382e-06, "loss": 3.3859, "step": 2091 }, { "epoch": 0.7130197682344922, "grad_norm": 15.358429843890118, "learning_rate": 9.435194053863492e-06, "loss": 3.5194, "step": 2092 }, { "epoch": 0.7133605998636674, "grad_norm": 19.412466957159683, "learning_rate": 9.434278127979923e-06, "loss": 3.7945, "step": 2093 }, { "epoch": 0.7137014314928425, "grad_norm": 14.194751426133202, "learning_rate": 9.433361504566751e-06, "loss": 3.0173, "step": 2094 }, { "epoch": 0.7140422631220177, "grad_norm": 23.21945102886546, "learning_rate": 9.432444183768167e-06, "loss": 3.4271, "step": 2095 }, { "epoch": 0.7143830947511929, "grad_norm": 16.140404586063685, "learning_rate": 9.431526165728468e-06, "loss": 3.6293, "step": 2096 }, { "epoch": 0.7147239263803681, "grad_norm": 20.025224319496953, "learning_rate": 9.430607450592061e-06, "loss": 2.9187, "step": 2097 }, { "epoch": 0.7150647580095433, "grad_norm": 24.1230327551842, "learning_rate": 9.429688038503468e-06, "loss": 3.4119, "step": 2098 }, { "epoch": 0.7154055896387185, "grad_norm": 21.860560373167758, "learning_rate": 9.428767929607311e-06, "loss": 3.2569, "step": 2099 }, { "epoch": 0.7157464212678937, "grad_norm": 16.190933343041902, "learning_rate": 9.427847124048327e-06, "loss": 3.5765, "step": 2100 }, { "epoch": 0.7160872528970689, "grad_norm": 19.205727110643647, "learning_rate": 9.426925621971368e-06, "loss": 3.6002, "step": 2101 }, { "epoch": 0.7164280845262441, "grad_norm": 22.122931669002806, "learning_rate": 9.426003423521384e-06, "loss": 3.5051, "step": 2102 }, { "epoch": 0.7167689161554193, "grad_norm": 17.123984370704385, "learning_rate": 9.425080528843443e-06, "loss": 3.524, "step": 2103 }, { "epoch": 0.7171097477845945, "grad_norm": 16.745855273147797, "learning_rate": 9.424156938082718e-06, "loss": 3.5285, "step": 2104 }, { "epoch": 0.7174505794137696, "grad_norm": 14.516706419087384, "learning_rate": 9.423232651384497e-06, "loss": 3.2747, "step": 2105 }, { "epoch": 0.7177914110429447, "grad_norm": 14.246778416977962, "learning_rate": 9.42230766889417e-06, "loss": 3.4641, "step": 2106 }, { "epoch": 0.71813224267212, "grad_norm": 16.58095857306711, "learning_rate": 9.421381990757246e-06, "loss": 3.4627, "step": 2107 }, { "epoch": 0.7184730743012951, "grad_norm": 22.663404026901187, "learning_rate": 9.420455617119331e-06, "loss": 3.2897, "step": 2108 }, { "epoch": 0.7188139059304703, "grad_norm": 14.044393794286663, "learning_rate": 9.419528548126152e-06, "loss": 3.2347, "step": 2109 }, { "epoch": 0.7191547375596455, "grad_norm": 15.128951272227432, "learning_rate": 9.41860078392354e-06, "loss": 2.6867, "step": 2110 }, { "epoch": 0.7194955691888207, "grad_norm": 16.0410928385781, "learning_rate": 9.417672324657434e-06, "loss": 3.2987, "step": 2111 }, { "epoch": 0.7198364008179959, "grad_norm": 33.89639004371312, "learning_rate": 9.416743170473885e-06, "loss": 3.1659, "step": 2112 }, { "epoch": 0.7201772324471711, "grad_norm": 18.634352909516263, "learning_rate": 9.415813321519054e-06, "loss": 3.6293, "step": 2113 }, { "epoch": 0.7205180640763463, "grad_norm": 15.035383910742356, "learning_rate": 9.41488277793921e-06, "loss": 3.1384, "step": 2114 }, { "epoch": 0.7208588957055214, "grad_norm": 20.508804092048123, "learning_rate": 9.413951539880731e-06, "loss": 3.5859, "step": 2115 }, { "epoch": 0.7211997273346966, "grad_norm": 15.80505551681569, "learning_rate": 9.413019607490102e-06, "loss": 3.6468, "step": 2116 }, { "epoch": 0.7215405589638718, "grad_norm": 29.067964705869628, "learning_rate": 9.412086980913922e-06, "loss": 3.2086, "step": 2117 }, { "epoch": 0.721881390593047, "grad_norm": 11.0571044334001, "learning_rate": 9.411153660298897e-06, "loss": 2.7291, "step": 2118 }, { "epoch": 0.7222222222222222, "grad_norm": 14.143067431664138, "learning_rate": 9.410219645791841e-06, "loss": 2.5099, "step": 2119 }, { "epoch": 0.7225630538513974, "grad_norm": 13.553828825464832, "learning_rate": 9.40928493753968e-06, "loss": 2.8003, "step": 2120 }, { "epoch": 0.7229038854805726, "grad_norm": 22.798779517546993, "learning_rate": 9.408349535689443e-06, "loss": 4.0223, "step": 2121 }, { "epoch": 0.7232447171097478, "grad_norm": 29.04272144834151, "learning_rate": 9.407413440388277e-06, "loss": 4.0865, "step": 2122 }, { "epoch": 0.723585548738923, "grad_norm": 21.708388757850162, "learning_rate": 9.406476651783433e-06, "loss": 3.5193, "step": 2123 }, { "epoch": 0.7239263803680982, "grad_norm": 16.543317884920462, "learning_rate": 9.405539170022269e-06, "loss": 3.3338, "step": 2124 }, { "epoch": 0.7242672119972734, "grad_norm": 11.630971341563763, "learning_rate": 9.404600995252256e-06, "loss": 3.0225, "step": 2125 }, { "epoch": 0.7246080436264485, "grad_norm": 58.67817623220733, "learning_rate": 9.403662127620974e-06, "loss": 2.3736, "step": 2126 }, { "epoch": 0.7249488752556237, "grad_norm": 20.675348292327357, "learning_rate": 9.402722567276107e-06, "loss": 3.2509, "step": 2127 }, { "epoch": 0.7252897068847989, "grad_norm": 20.1739415533614, "learning_rate": 9.401782314365458e-06, "loss": 3.3504, "step": 2128 }, { "epoch": 0.7256305385139741, "grad_norm": 12.62005522688663, "learning_rate": 9.400841369036926e-06, "loss": 2.8552, "step": 2129 }, { "epoch": 0.7259713701431493, "grad_norm": 17.015086628492835, "learning_rate": 9.399899731438527e-06, "loss": 2.9039, "step": 2130 }, { "epoch": 0.7263122017723245, "grad_norm": 26.28232266523384, "learning_rate": 9.398957401718387e-06, "loss": 3.2005, "step": 2131 }, { "epoch": 0.7266530334014997, "grad_norm": 19.00622615223013, "learning_rate": 9.398014380024736e-06, "loss": 3.3896, "step": 2132 }, { "epoch": 0.7269938650306749, "grad_norm": 33.76365152296656, "learning_rate": 9.397070666505914e-06, "loss": 3.1952, "step": 2133 }, { "epoch": 0.7273346966598501, "grad_norm": 17.865703123998077, "learning_rate": 9.396126261310375e-06, "loss": 3.2917, "step": 2134 }, { "epoch": 0.7276755282890253, "grad_norm": 16.878853737888385, "learning_rate": 9.395181164586675e-06, "loss": 3.4298, "step": 2135 }, { "epoch": 0.7280163599182005, "grad_norm": 19.229647258773202, "learning_rate": 9.39423537648348e-06, "loss": 3.714, "step": 2136 }, { "epoch": 0.7283571915473755, "grad_norm": 11.629717725532092, "learning_rate": 9.393288897149568e-06, "loss": 2.9651, "step": 2137 }, { "epoch": 0.7286980231765507, "grad_norm": 14.842689281654332, "learning_rate": 9.392341726733825e-06, "loss": 3.2475, "step": 2138 }, { "epoch": 0.7290388548057259, "grad_norm": 11.597251862638945, "learning_rate": 9.391393865385241e-06, "loss": 2.4716, "step": 2139 }, { "epoch": 0.7293796864349011, "grad_norm": 14.773770711380942, "learning_rate": 9.390445313252923e-06, "loss": 2.6929, "step": 2140 }, { "epoch": 0.7297205180640763, "grad_norm": 13.070233071968197, "learning_rate": 9.38949607048608e-06, "loss": 2.8606, "step": 2141 }, { "epoch": 0.7300613496932515, "grad_norm": 18.715139905950707, "learning_rate": 9.388546137234031e-06, "loss": 3.9204, "step": 2142 }, { "epoch": 0.7304021813224267, "grad_norm": 30.91185338467825, "learning_rate": 9.387595513646203e-06, "loss": 3.5411, "step": 2143 }, { "epoch": 0.7307430129516019, "grad_norm": 22.35001476303035, "learning_rate": 9.386644199872136e-06, "loss": 3.7739, "step": 2144 }, { "epoch": 0.7310838445807771, "grad_norm": 11.360516617175431, "learning_rate": 9.385692196061475e-06, "loss": 2.8393, "step": 2145 }, { "epoch": 0.7314246762099523, "grad_norm": 18.10681642479518, "learning_rate": 9.384739502363974e-06, "loss": 3.9415, "step": 2146 }, { "epoch": 0.7317655078391274, "grad_norm": 19.724109530396593, "learning_rate": 9.383786118929494e-06, "loss": 2.909, "step": 2147 }, { "epoch": 0.7321063394683026, "grad_norm": 15.763733893898985, "learning_rate": 9.382832045908005e-06, "loss": 3.5376, "step": 2148 }, { "epoch": 0.7324471710974778, "grad_norm": 16.484036594375144, "learning_rate": 9.38187728344959e-06, "loss": 3.1311, "step": 2149 }, { "epoch": 0.732788002726653, "grad_norm": 13.664964196419726, "learning_rate": 9.380921831704432e-06, "loss": 2.818, "step": 2150 }, { "epoch": 0.7331288343558282, "grad_norm": 21.79716829037097, "learning_rate": 9.379965690822834e-06, "loss": 2.9985, "step": 2151 }, { "epoch": 0.7334696659850034, "grad_norm": 16.84405114087349, "learning_rate": 9.379008860955197e-06, "loss": 3.4575, "step": 2152 }, { "epoch": 0.7338104976141786, "grad_norm": 20.297002063945683, "learning_rate": 9.378051342252033e-06, "loss": 3.0723, "step": 2153 }, { "epoch": 0.7341513292433538, "grad_norm": 15.229877054371403, "learning_rate": 9.377093134863964e-06, "loss": 3.5116, "step": 2154 }, { "epoch": 0.734492160872529, "grad_norm": 20.102329987105268, "learning_rate": 9.376134238941721e-06, "loss": 3.7979, "step": 2155 }, { "epoch": 0.7348329925017042, "grad_norm": 18.069566736179308, "learning_rate": 9.375174654636144e-06, "loss": 3.0663, "step": 2156 }, { "epoch": 0.7351738241308794, "grad_norm": 14.29597169236135, "learning_rate": 9.374214382098174e-06, "loss": 2.9961, "step": 2157 }, { "epoch": 0.7355146557600545, "grad_norm": 14.728883341795942, "learning_rate": 9.373253421478872e-06, "loss": 3.4193, "step": 2158 }, { "epoch": 0.7358554873892297, "grad_norm": 17.730710005851144, "learning_rate": 9.372291772929396e-06, "loss": 3.4843, "step": 2159 }, { "epoch": 0.7361963190184049, "grad_norm": 18.437335202751587, "learning_rate": 9.37132943660102e-06, "loss": 3.595, "step": 2160 }, { "epoch": 0.7365371506475801, "grad_norm": 19.340814698089993, "learning_rate": 9.370366412645122e-06, "loss": 3.5238, "step": 2161 }, { "epoch": 0.7368779822767553, "grad_norm": 12.794623594348831, "learning_rate": 9.36940270121319e-06, "loss": 2.9988, "step": 2162 }, { "epoch": 0.7372188139059305, "grad_norm": 42.404861330438266, "learning_rate": 9.36843830245682e-06, "loss": 3.2262, "step": 2163 }, { "epoch": 0.7375596455351057, "grad_norm": 13.372116795211642, "learning_rate": 9.367473216527714e-06, "loss": 3.1717, "step": 2164 }, { "epoch": 0.7379004771642809, "grad_norm": 18.502127879020858, "learning_rate": 9.366507443577687e-06, "loss": 3.515, "step": 2165 }, { "epoch": 0.7382413087934561, "grad_norm": 14.354791922216167, "learning_rate": 9.365540983758654e-06, "loss": 3.5344, "step": 2166 }, { "epoch": 0.7385821404226313, "grad_norm": 21.054251619549888, "learning_rate": 9.364573837222648e-06, "loss": 3.5727, "step": 2167 }, { "epoch": 0.7389229720518065, "grad_norm": 16.538471482234247, "learning_rate": 9.363606004121804e-06, "loss": 3.5327, "step": 2168 }, { "epoch": 0.7392638036809815, "grad_norm": 10.214764307764867, "learning_rate": 9.362637484608363e-06, "loss": 2.9283, "step": 2169 }, { "epoch": 0.7396046353101567, "grad_norm": 18.343435111081916, "learning_rate": 9.36166827883468e-06, "loss": 3.6453, "step": 2170 }, { "epoch": 0.7399454669393319, "grad_norm": 19.740084996092282, "learning_rate": 9.360698386953215e-06, "loss": 3.9665, "step": 2171 }, { "epoch": 0.7402862985685071, "grad_norm": 18.9907173073652, "learning_rate": 9.359727809116534e-06, "loss": 3.9118, "step": 2172 }, { "epoch": 0.7406271301976823, "grad_norm": 20.877752836602724, "learning_rate": 9.358756545477313e-06, "loss": 3.0459, "step": 2173 }, { "epoch": 0.7409679618268575, "grad_norm": 12.788947158766721, "learning_rate": 9.357784596188336e-06, "loss": 3.0421, "step": 2174 }, { "epoch": 0.7413087934560327, "grad_norm": 15.545949472715442, "learning_rate": 9.356811961402495e-06, "loss": 3.0669, "step": 2175 }, { "epoch": 0.7416496250852079, "grad_norm": 17.753827684355173, "learning_rate": 9.35583864127279e-06, "loss": 3.555, "step": 2176 }, { "epoch": 0.7419904567143831, "grad_norm": 18.625258748317222, "learning_rate": 9.354864635952326e-06, "loss": 3.8347, "step": 2177 }, { "epoch": 0.7423312883435583, "grad_norm": 17.975737396510112, "learning_rate": 9.35388994559432e-06, "loss": 3.7061, "step": 2178 }, { "epoch": 0.7426721199727335, "grad_norm": 12.675949702293165, "learning_rate": 9.352914570352091e-06, "loss": 2.6705, "step": 2179 }, { "epoch": 0.7430129516019086, "grad_norm": 19.815875738318564, "learning_rate": 9.351938510379076e-06, "loss": 3.6813, "step": 2180 }, { "epoch": 0.7433537832310838, "grad_norm": 16.588717999824787, "learning_rate": 9.350961765828807e-06, "loss": 3.6362, "step": 2181 }, { "epoch": 0.743694614860259, "grad_norm": 26.16639037155661, "learning_rate": 9.349984336854933e-06, "loss": 3.2156, "step": 2182 }, { "epoch": 0.7440354464894342, "grad_norm": 15.851113494129976, "learning_rate": 9.349006223611205e-06, "loss": 3.4348, "step": 2183 }, { "epoch": 0.7443762781186094, "grad_norm": 20.597915214000203, "learning_rate": 9.348027426251487e-06, "loss": 3.2739, "step": 2184 }, { "epoch": 0.7447171097477846, "grad_norm": 13.405937403467084, "learning_rate": 9.347047944929746e-06, "loss": 3.2374, "step": 2185 }, { "epoch": 0.7450579413769598, "grad_norm": 22.159732733387383, "learning_rate": 9.34606777980006e-06, "loss": 3.0936, "step": 2186 }, { "epoch": 0.745398773006135, "grad_norm": 16.58468274811247, "learning_rate": 9.34508693101661e-06, "loss": 3.3175, "step": 2187 }, { "epoch": 0.7457396046353102, "grad_norm": 17.542675982933208, "learning_rate": 9.34410539873369e-06, "loss": 2.8963, "step": 2188 }, { "epoch": 0.7460804362644854, "grad_norm": 34.37201658514218, "learning_rate": 9.343123183105696e-06, "loss": 3.0524, "step": 2189 }, { "epoch": 0.7464212678936605, "grad_norm": 17.002253441097135, "learning_rate": 9.342140284287138e-06, "loss": 3.3892, "step": 2190 }, { "epoch": 0.7467620995228357, "grad_norm": 17.641226622567473, "learning_rate": 9.341156702432628e-06, "loss": 3.2909, "step": 2191 }, { "epoch": 0.7471029311520109, "grad_norm": 12.917285103621877, "learning_rate": 9.340172437696887e-06, "loss": 3.1064, "step": 2192 }, { "epoch": 0.7474437627811861, "grad_norm": 18.73676190115936, "learning_rate": 9.339187490234745e-06, "loss": 3.3704, "step": 2193 }, { "epoch": 0.7477845944103613, "grad_norm": 17.494518310140794, "learning_rate": 9.338201860201136e-06, "loss": 4.096, "step": 2194 }, { "epoch": 0.7481254260395365, "grad_norm": 19.215726206872105, "learning_rate": 9.337215547751106e-06, "loss": 3.9718, "step": 2195 }, { "epoch": 0.7484662576687117, "grad_norm": 17.81620884597207, "learning_rate": 9.336228553039806e-06, "loss": 3.4317, "step": 2196 }, { "epoch": 0.7488070892978869, "grad_norm": 22.984282987381416, "learning_rate": 9.335240876222491e-06, "loss": 3.8808, "step": 2197 }, { "epoch": 0.7491479209270621, "grad_norm": 17.258399298271794, "learning_rate": 9.33425251745453e-06, "loss": 3.6919, "step": 2198 }, { "epoch": 0.7494887525562373, "grad_norm": 20.928123680354293, "learning_rate": 9.333263476891394e-06, "loss": 3.5518, "step": 2199 }, { "epoch": 0.7498295841854125, "grad_norm": 11.587359875322278, "learning_rate": 9.332273754688663e-06, "loss": 2.8778, "step": 2200 }, { "epoch": 0.7501704158145875, "grad_norm": 15.197307259098533, "learning_rate": 9.331283351002025e-06, "loss": 3.2932, "step": 2201 }, { "epoch": 0.7505112474437627, "grad_norm": 19.404660170563698, "learning_rate": 9.330292265987272e-06, "loss": 3.4958, "step": 2202 }, { "epoch": 0.7508520790729379, "grad_norm": 17.634293740189626, "learning_rate": 9.32930049980031e-06, "loss": 3.647, "step": 2203 }, { "epoch": 0.7511929107021131, "grad_norm": 13.922820779239487, "learning_rate": 9.328308052597146e-06, "loss": 2.8803, "step": 2204 }, { "epoch": 0.7515337423312883, "grad_norm": 17.900230465099092, "learning_rate": 9.327314924533895e-06, "loss": 3.6653, "step": 2205 }, { "epoch": 0.7518745739604635, "grad_norm": 19.71394320222073, "learning_rate": 9.32632111576678e-06, "loss": 3.3984, "step": 2206 }, { "epoch": 0.7522154055896387, "grad_norm": 15.041103905115955, "learning_rate": 9.325326626452132e-06, "loss": 3.8555, "step": 2207 }, { "epoch": 0.7525562372188139, "grad_norm": 22.596771660900377, "learning_rate": 9.324331456746386e-06, "loss": 4.0586, "step": 2208 }, { "epoch": 0.7528970688479891, "grad_norm": 12.474683116156665, "learning_rate": 9.323335606806087e-06, "loss": 3.2313, "step": 2209 }, { "epoch": 0.7532379004771643, "grad_norm": 12.748036023873059, "learning_rate": 9.32233907678789e-06, "loss": 3.0374, "step": 2210 }, { "epoch": 0.7535787321063395, "grad_norm": 20.779819095959084, "learning_rate": 9.321341866848548e-06, "loss": 3.5759, "step": 2211 }, { "epoch": 0.7539195637355146, "grad_norm": 16.760112147520704, "learning_rate": 9.320343977144928e-06, "loss": 3.6889, "step": 2212 }, { "epoch": 0.7542603953646898, "grad_norm": 19.769407717086892, "learning_rate": 9.319345407834001e-06, "loss": 3.4033, "step": 2213 }, { "epoch": 0.754601226993865, "grad_norm": 19.05678859880588, "learning_rate": 9.318346159072848e-06, "loss": 3.6113, "step": 2214 }, { "epoch": 0.7549420586230402, "grad_norm": 15.115377524683467, "learning_rate": 9.317346231018655e-06, "loss": 3.6396, "step": 2215 }, { "epoch": 0.7552828902522154, "grad_norm": 24.795231138669546, "learning_rate": 9.316345623828713e-06, "loss": 3.5372, "step": 2216 }, { "epoch": 0.7556237218813906, "grad_norm": 15.181193246515999, "learning_rate": 9.315344337660422e-06, "loss": 3.4035, "step": 2217 }, { "epoch": 0.7559645535105658, "grad_norm": 14.485168184672116, "learning_rate": 9.314342372671288e-06, "loss": 2.999, "step": 2218 }, { "epoch": 0.756305385139741, "grad_norm": 12.444307070806353, "learning_rate": 9.313339729018924e-06, "loss": 3.0643, "step": 2219 }, { "epoch": 0.7566462167689162, "grad_norm": 15.109182172426845, "learning_rate": 9.31233640686105e-06, "loss": 3.2432, "step": 2220 }, { "epoch": 0.7569870483980914, "grad_norm": 17.028611250437553, "learning_rate": 9.311332406355494e-06, "loss": 3.0913, "step": 2221 }, { "epoch": 0.7573278800272665, "grad_norm": 23.970153837870257, "learning_rate": 9.310327727660186e-06, "loss": 3.0553, "step": 2222 }, { "epoch": 0.7576687116564417, "grad_norm": 17.589589454302235, "learning_rate": 9.30932237093317e-06, "loss": 3.7805, "step": 2223 }, { "epoch": 0.7580095432856169, "grad_norm": 18.2585801475019, "learning_rate": 9.30831633633259e-06, "loss": 3.5395, "step": 2224 }, { "epoch": 0.7583503749147921, "grad_norm": 15.04097606023296, "learning_rate": 9.3073096240167e-06, "loss": 3.2287, "step": 2225 }, { "epoch": 0.7586912065439673, "grad_norm": 18.18052016031695, "learning_rate": 9.30630223414386e-06, "loss": 3.7671, "step": 2226 }, { "epoch": 0.7590320381731425, "grad_norm": 24.405177831804576, "learning_rate": 9.305294166872535e-06, "loss": 3.2137, "step": 2227 }, { "epoch": 0.7593728698023177, "grad_norm": 19.793330052440826, "learning_rate": 9.304285422361301e-06, "loss": 3.4576, "step": 2228 }, { "epoch": 0.7597137014314929, "grad_norm": 32.18943112293496, "learning_rate": 9.303276000768835e-06, "loss": 2.5832, "step": 2229 }, { "epoch": 0.7600545330606681, "grad_norm": 17.002338113604342, "learning_rate": 9.302265902253924e-06, "loss": 3.4641, "step": 2230 }, { "epoch": 0.7603953646898433, "grad_norm": 16.389134179340285, "learning_rate": 9.301255126975459e-06, "loss": 2.9225, "step": 2231 }, { "epoch": 0.7607361963190185, "grad_norm": 9.962315650258061, "learning_rate": 9.30024367509244e-06, "loss": 2.5274, "step": 2232 }, { "epoch": 0.7610770279481935, "grad_norm": 20.34876926769682, "learning_rate": 9.299231546763974e-06, "loss": 3.3728, "step": 2233 }, { "epoch": 0.7614178595773687, "grad_norm": 18.919413738017173, "learning_rate": 9.298218742149272e-06, "loss": 3.5811, "step": 2234 }, { "epoch": 0.7617586912065439, "grad_norm": 13.273083493935724, "learning_rate": 9.29720526140765e-06, "loss": 3.3602, "step": 2235 }, { "epoch": 0.7620995228357191, "grad_norm": 20.80465445159649, "learning_rate": 9.296191104698535e-06, "loss": 3.0763, "step": 2236 }, { "epoch": 0.7624403544648943, "grad_norm": 12.382199728235753, "learning_rate": 9.295176272181457e-06, "loss": 2.779, "step": 2237 }, { "epoch": 0.7627811860940695, "grad_norm": 33.94568656681278, "learning_rate": 9.294160764016053e-06, "loss": 3.6467, "step": 2238 }, { "epoch": 0.7631220177232447, "grad_norm": 14.433892941231413, "learning_rate": 9.293144580362067e-06, "loss": 2.5348, "step": 2239 }, { "epoch": 0.7634628493524199, "grad_norm": 18.577871939260245, "learning_rate": 9.292127721379348e-06, "loss": 3.5378, "step": 2240 }, { "epoch": 0.7638036809815951, "grad_norm": 19.160629364415474, "learning_rate": 9.291110187227853e-06, "loss": 3.8237, "step": 2241 }, { "epoch": 0.7641445126107703, "grad_norm": 17.76211823134256, "learning_rate": 9.290091978067643e-06, "loss": 3.7033, "step": 2242 }, { "epoch": 0.7644853442399455, "grad_norm": 20.019362488023077, "learning_rate": 9.289073094058889e-06, "loss": 3.0504, "step": 2243 }, { "epoch": 0.7648261758691206, "grad_norm": 16.212936357246484, "learning_rate": 9.28805353536186e-06, "loss": 3.5597, "step": 2244 }, { "epoch": 0.7651670074982958, "grad_norm": 19.19924710019374, "learning_rate": 9.287033302136943e-06, "loss": 3.4762, "step": 2245 }, { "epoch": 0.765507839127471, "grad_norm": 17.79562431377502, "learning_rate": 9.286012394544621e-06, "loss": 3.2457, "step": 2246 }, { "epoch": 0.7658486707566462, "grad_norm": 13.969697135843267, "learning_rate": 9.284990812745488e-06, "loss": 3.4271, "step": 2247 }, { "epoch": 0.7661895023858214, "grad_norm": 26.857724555096212, "learning_rate": 9.283968556900243e-06, "loss": 4.2087, "step": 2248 }, { "epoch": 0.7665303340149966, "grad_norm": 19.95592626278951, "learning_rate": 9.282945627169688e-06, "loss": 3.3688, "step": 2249 }, { "epoch": 0.7668711656441718, "grad_norm": 19.80020052981005, "learning_rate": 9.281922023714739e-06, "loss": 3.4484, "step": 2250 }, { "epoch": 0.767211997273347, "grad_norm": 13.9565209775081, "learning_rate": 9.280897746696409e-06, "loss": 2.9656, "step": 2251 }, { "epoch": 0.7675528289025222, "grad_norm": 15.959063701628548, "learning_rate": 9.279872796275823e-06, "loss": 3.312, "step": 2252 }, { "epoch": 0.7678936605316974, "grad_norm": 16.45381165903346, "learning_rate": 9.278847172614209e-06, "loss": 2.5325, "step": 2253 }, { "epoch": 0.7682344921608726, "grad_norm": 17.079219372645227, "learning_rate": 9.277820875872902e-06, "loss": 3.3, "step": 2254 }, { "epoch": 0.7685753237900477, "grad_norm": 20.111078753324165, "learning_rate": 9.276793906213342e-06, "loss": 3.2207, "step": 2255 }, { "epoch": 0.7689161554192229, "grad_norm": 20.44805689414811, "learning_rate": 9.275766263797075e-06, "loss": 2.8747, "step": 2256 }, { "epoch": 0.7692569870483981, "grad_norm": 16.534426584559167, "learning_rate": 9.274737948785752e-06, "loss": 3.3387, "step": 2257 }, { "epoch": 0.7695978186775733, "grad_norm": 11.455323245489279, "learning_rate": 9.273708961341137e-06, "loss": 2.8507, "step": 2258 }, { "epoch": 0.7699386503067485, "grad_norm": 14.9687767387331, "learning_rate": 9.272679301625088e-06, "loss": 2.9025, "step": 2259 }, { "epoch": 0.7702794819359237, "grad_norm": 17.259326533473462, "learning_rate": 9.271648969799574e-06, "loss": 2.9936, "step": 2260 }, { "epoch": 0.7706203135650989, "grad_norm": 20.58388894259241, "learning_rate": 9.270617966026676e-06, "loss": 3.06, "step": 2261 }, { "epoch": 0.7709611451942741, "grad_norm": 33.62407169145876, "learning_rate": 9.26958629046857e-06, "loss": 3.6448, "step": 2262 }, { "epoch": 0.7713019768234493, "grad_norm": 14.647001225837155, "learning_rate": 9.268553943287543e-06, "loss": 3.7968, "step": 2263 }, { "epoch": 0.7716428084526245, "grad_norm": 13.280144277351141, "learning_rate": 9.26752092464599e-06, "loss": 2.9437, "step": 2264 }, { "epoch": 0.7719836400817995, "grad_norm": 32.682413360006294, "learning_rate": 9.266487234706409e-06, "loss": 3.3698, "step": 2265 }, { "epoch": 0.7723244717109747, "grad_norm": 19.177515467381305, "learning_rate": 9.2654528736314e-06, "loss": 2.7436, "step": 2266 }, { "epoch": 0.7726653033401499, "grad_norm": 18.988050292988977, "learning_rate": 9.264417841583675e-06, "loss": 3.0641, "step": 2267 }, { "epoch": 0.7730061349693251, "grad_norm": 18.88222654804129, "learning_rate": 9.263382138726047e-06, "loss": 3.5783, "step": 2268 }, { "epoch": 0.7733469665985003, "grad_norm": 13.992875934243239, "learning_rate": 9.262345765221439e-06, "loss": 2.7541, "step": 2269 }, { "epoch": 0.7736877982276755, "grad_norm": 18.625825705576826, "learning_rate": 9.261308721232872e-06, "loss": 2.7369, "step": 2270 }, { "epoch": 0.7740286298568507, "grad_norm": 13.071364974931148, "learning_rate": 9.260271006923482e-06, "loss": 3.6027, "step": 2271 }, { "epoch": 0.7743694614860259, "grad_norm": 13.557491615555827, "learning_rate": 9.259232622456502e-06, "loss": 3.0037, "step": 2272 }, { "epoch": 0.7747102931152011, "grad_norm": 12.792879055578162, "learning_rate": 9.258193567995278e-06, "loss": 2.8715, "step": 2273 }, { "epoch": 0.7750511247443763, "grad_norm": 16.121132129924348, "learning_rate": 9.257153843703252e-06, "loss": 3.534, "step": 2274 }, { "epoch": 0.7753919563735515, "grad_norm": 29.461863476616383, "learning_rate": 9.256113449743981e-06, "loss": 3.0347, "step": 2275 }, { "epoch": 0.7757327880027266, "grad_norm": 20.16592517956135, "learning_rate": 9.255072386281121e-06, "loss": 3.4961, "step": 2276 }, { "epoch": 0.7760736196319018, "grad_norm": 22.014447477498507, "learning_rate": 9.254030653478436e-06, "loss": 3.1273, "step": 2277 }, { "epoch": 0.776414451261077, "grad_norm": 11.548066637272779, "learning_rate": 9.252988251499796e-06, "loss": 3.2111, "step": 2278 }, { "epoch": 0.7767552828902522, "grad_norm": 19.331526231968937, "learning_rate": 9.251945180509174e-06, "loss": 3.5842, "step": 2279 }, { "epoch": 0.7770961145194274, "grad_norm": 20.734471308717293, "learning_rate": 9.250901440670648e-06, "loss": 3.4387, "step": 2280 }, { "epoch": 0.7774369461486026, "grad_norm": 17.316072615428816, "learning_rate": 9.249857032148404e-06, "loss": 3.248, "step": 2281 }, { "epoch": 0.7777777777777778, "grad_norm": 13.855899764125656, "learning_rate": 9.248811955106732e-06, "loss": 3.5959, "step": 2282 }, { "epoch": 0.778118609406953, "grad_norm": 13.943943066000202, "learning_rate": 9.247766209710025e-06, "loss": 3.2347, "step": 2283 }, { "epoch": 0.7784594410361282, "grad_norm": 17.094030818433183, "learning_rate": 9.246719796122784e-06, "loss": 2.584, "step": 2284 }, { "epoch": 0.7788002726653034, "grad_norm": 22.135702305649247, "learning_rate": 9.245672714509613e-06, "loss": 3.5853, "step": 2285 }, { "epoch": 0.7791411042944786, "grad_norm": 13.344222546600374, "learning_rate": 9.244624965035223e-06, "loss": 3.4417, "step": 2286 }, { "epoch": 0.7794819359236537, "grad_norm": 17.669070849785857, "learning_rate": 9.24357654786443e-06, "loss": 3.3253, "step": 2287 }, { "epoch": 0.7798227675528289, "grad_norm": 14.474884755513854, "learning_rate": 9.242527463162155e-06, "loss": 3.34, "step": 2288 }, { "epoch": 0.7801635991820041, "grad_norm": 15.975704191024015, "learning_rate": 9.241477711093419e-06, "loss": 3.0057, "step": 2289 }, { "epoch": 0.7805044308111793, "grad_norm": 16.407186555053222, "learning_rate": 9.240427291823355e-06, "loss": 3.1972, "step": 2290 }, { "epoch": 0.7808452624403545, "grad_norm": 21.231020638901168, "learning_rate": 9.239376205517197e-06, "loss": 2.9554, "step": 2291 }, { "epoch": 0.7811860940695297, "grad_norm": 36.65695877619637, "learning_rate": 9.238324452340288e-06, "loss": 3.3557, "step": 2292 }, { "epoch": 0.7815269256987049, "grad_norm": 13.171241082042586, "learning_rate": 9.23727203245807e-06, "loss": 3.4168, "step": 2293 }, { "epoch": 0.78186775732788, "grad_norm": 13.201834507029362, "learning_rate": 9.236218946036093e-06, "loss": 2.9598, "step": 2294 }, { "epoch": 0.7822085889570553, "grad_norm": 27.226213326908056, "learning_rate": 9.235165193240015e-06, "loss": 3.6304, "step": 2295 }, { "epoch": 0.7825494205862304, "grad_norm": 17.431740106405698, "learning_rate": 9.23411077423559e-06, "loss": 3.5265, "step": 2296 }, { "epoch": 0.7828902522154055, "grad_norm": 16.44150270163108, "learning_rate": 9.233055689188686e-06, "loss": 3.423, "step": 2297 }, { "epoch": 0.7832310838445807, "grad_norm": 13.368735408602225, "learning_rate": 9.23199993826527e-06, "loss": 2.9745, "step": 2298 }, { "epoch": 0.7835719154737559, "grad_norm": 18.170126862758586, "learning_rate": 9.23094352163142e-06, "loss": 3.5841, "step": 2299 }, { "epoch": 0.7839127471029311, "grad_norm": 15.156829936723492, "learning_rate": 9.22988643945331e-06, "loss": 3.5537, "step": 2300 }, { "epoch": 0.7842535787321063, "grad_norm": 13.324848579705975, "learning_rate": 9.228828691897225e-06, "loss": 3.2456, "step": 2301 }, { "epoch": 0.7845944103612815, "grad_norm": 18.963853485920126, "learning_rate": 9.227770279129553e-06, "loss": 2.8456, "step": 2302 }, { "epoch": 0.7849352419904567, "grad_norm": 17.291228983034287, "learning_rate": 9.226711201316785e-06, "loss": 3.1616, "step": 2303 }, { "epoch": 0.7852760736196319, "grad_norm": 15.466287070573062, "learning_rate": 9.225651458625521e-06, "loss": 3.4873, "step": 2304 }, { "epoch": 0.7856169052488071, "grad_norm": 22.590751333665924, "learning_rate": 9.224591051222461e-06, "loss": 3.6783, "step": 2305 }, { "epoch": 0.7859577368779823, "grad_norm": 18.223262321042696, "learning_rate": 9.223529979274411e-06, "loss": 3.6829, "step": 2306 }, { "epoch": 0.7862985685071575, "grad_norm": 15.83825828874748, "learning_rate": 9.222468242948283e-06, "loss": 3.5601, "step": 2307 }, { "epoch": 0.7866394001363326, "grad_norm": 29.841818000501487, "learning_rate": 9.221405842411091e-06, "loss": 3.3108, "step": 2308 }, { "epoch": 0.7869802317655078, "grad_norm": 17.35144049008841, "learning_rate": 9.220342777829956e-06, "loss": 3.6368, "step": 2309 }, { "epoch": 0.787321063394683, "grad_norm": 27.032709484313855, "learning_rate": 9.219279049372101e-06, "loss": 3.2408, "step": 2310 }, { "epoch": 0.7876618950238582, "grad_norm": 15.38084381091063, "learning_rate": 9.218214657204857e-06, "loss": 2.9746, "step": 2311 }, { "epoch": 0.7880027266530334, "grad_norm": 16.244742832934143, "learning_rate": 9.217149601495654e-06, "loss": 3.0045, "step": 2312 }, { "epoch": 0.7883435582822086, "grad_norm": 16.832014690142568, "learning_rate": 9.216083882412033e-06, "loss": 3.2433, "step": 2313 }, { "epoch": 0.7886843899113838, "grad_norm": 14.8472917092808, "learning_rate": 9.215017500121632e-06, "loss": 3.5616, "step": 2314 }, { "epoch": 0.789025221540559, "grad_norm": 17.118380770971644, "learning_rate": 9.213950454792201e-06, "loss": 3.2948, "step": 2315 }, { "epoch": 0.7893660531697342, "grad_norm": 17.530113356950764, "learning_rate": 9.212882746591588e-06, "loss": 3.4175, "step": 2316 }, { "epoch": 0.7897068847989094, "grad_norm": 14.021223425700859, "learning_rate": 9.211814375687749e-06, "loss": 3.6983, "step": 2317 }, { "epoch": 0.7900477164280846, "grad_norm": 16.93241062662418, "learning_rate": 9.21074534224874e-06, "loss": 2.6702, "step": 2318 }, { "epoch": 0.7903885480572597, "grad_norm": 22.78694448352502, "learning_rate": 9.20967564644273e-06, "loss": 3.2993, "step": 2319 }, { "epoch": 0.7907293796864349, "grad_norm": 18.374178686965557, "learning_rate": 9.208605288437982e-06, "loss": 2.9033, "step": 2320 }, { "epoch": 0.7910702113156101, "grad_norm": 25.398005580063025, "learning_rate": 9.20753426840287e-06, "loss": 3.1823, "step": 2321 }, { "epoch": 0.7914110429447853, "grad_norm": 16.978090493951253, "learning_rate": 9.206462586505865e-06, "loss": 2.8169, "step": 2322 }, { "epoch": 0.7917518745739605, "grad_norm": 13.89558506091015, "learning_rate": 9.205390242915554e-06, "loss": 3.4932, "step": 2323 }, { "epoch": 0.7920927062031357, "grad_norm": 21.222777654744895, "learning_rate": 9.204317237800613e-06, "loss": 2.5054, "step": 2324 }, { "epoch": 0.7924335378323109, "grad_norm": 16.416770309343, "learning_rate": 9.203243571329836e-06, "loss": 4.0085, "step": 2325 }, { "epoch": 0.792774369461486, "grad_norm": 21.618829778549372, "learning_rate": 9.202169243672114e-06, "loss": 3.9931, "step": 2326 }, { "epoch": 0.7931152010906612, "grad_norm": 20.20824907964005, "learning_rate": 9.20109425499644e-06, "loss": 3.1761, "step": 2327 }, { "epoch": 0.7934560327198364, "grad_norm": 15.232954643706833, "learning_rate": 9.200018605471918e-06, "loss": 3.3236, "step": 2328 }, { "epoch": 0.7937968643490115, "grad_norm": 18.00184780641661, "learning_rate": 9.19894229526775e-06, "loss": 3.3332, "step": 2329 }, { "epoch": 0.7941376959781867, "grad_norm": 17.81055633299913, "learning_rate": 9.197865324553243e-06, "loss": 3.7239, "step": 2330 }, { "epoch": 0.7944785276073619, "grad_norm": 12.970887142862471, "learning_rate": 9.19678769349781e-06, "loss": 3.076, "step": 2331 }, { "epoch": 0.7948193592365371, "grad_norm": 32.018652840320236, "learning_rate": 9.195709402270967e-06, "loss": 2.8213, "step": 2332 }, { "epoch": 0.7951601908657123, "grad_norm": 12.105424253229685, "learning_rate": 9.194630451042331e-06, "loss": 3.1369, "step": 2333 }, { "epoch": 0.7955010224948875, "grad_norm": 15.309590307790572, "learning_rate": 9.19355083998163e-06, "loss": 3.6113, "step": 2334 }, { "epoch": 0.7958418541240627, "grad_norm": 18.470453927369803, "learning_rate": 9.192470569258684e-06, "loss": 3.7743, "step": 2335 }, { "epoch": 0.7961826857532379, "grad_norm": 34.254852433650235, "learning_rate": 9.191389639043432e-06, "loss": 3.6329, "step": 2336 }, { "epoch": 0.7965235173824131, "grad_norm": 15.13078254657666, "learning_rate": 9.190308049505905e-06, "loss": 3.0793, "step": 2337 }, { "epoch": 0.7968643490115883, "grad_norm": 16.778827493546654, "learning_rate": 9.18922580081624e-06, "loss": 3.257, "step": 2338 }, { "epoch": 0.7972051806407635, "grad_norm": 14.983664704644697, "learning_rate": 9.18814289314468e-06, "loss": 3.1499, "step": 2339 }, { "epoch": 0.7975460122699386, "grad_norm": 15.44211616455542, "learning_rate": 9.18705932666157e-06, "loss": 3.555, "step": 2340 }, { "epoch": 0.7978868438991138, "grad_norm": 14.747541869841813, "learning_rate": 9.185975101537361e-06, "loss": 3.5558, "step": 2341 }, { "epoch": 0.798227675528289, "grad_norm": 15.28420468166412, "learning_rate": 9.184890217942606e-06, "loss": 2.8295, "step": 2342 }, { "epoch": 0.7985685071574642, "grad_norm": 24.004605688689164, "learning_rate": 9.18380467604796e-06, "loss": 3.6598, "step": 2343 }, { "epoch": 0.7989093387866394, "grad_norm": 17.500257941226682, "learning_rate": 9.182718476024184e-06, "loss": 3.4082, "step": 2344 }, { "epoch": 0.7992501704158146, "grad_norm": 22.903309079374456, "learning_rate": 9.18163161804214e-06, "loss": 2.8414, "step": 2345 }, { "epoch": 0.7995910020449898, "grad_norm": 33.21018941675157, "learning_rate": 9.180544102272798e-06, "loss": 3.4036, "step": 2346 }, { "epoch": 0.799931833674165, "grad_norm": 20.71974394900947, "learning_rate": 9.179455928887226e-06, "loss": 3.6448, "step": 2347 }, { "epoch": 0.8002726653033402, "grad_norm": 40.499421515147944, "learning_rate": 9.1783670980566e-06, "loss": 3.4455, "step": 2348 }, { "epoch": 0.8006134969325154, "grad_norm": 14.967676188811344, "learning_rate": 9.177277609952196e-06, "loss": 2.917, "step": 2349 }, { "epoch": 0.8009543285616906, "grad_norm": 25.388501724176354, "learning_rate": 9.176187464745394e-06, "loss": 3.4399, "step": 2350 }, { "epoch": 0.8012951601908657, "grad_norm": 20.323614861389242, "learning_rate": 9.17509666260768e-06, "loss": 3.5136, "step": 2351 }, { "epoch": 0.8016359918200409, "grad_norm": 20.50972572332781, "learning_rate": 9.17400520371064e-06, "loss": 3.8936, "step": 2352 }, { "epoch": 0.8019768234492161, "grad_norm": 17.535716451912997, "learning_rate": 9.172913088225968e-06, "loss": 3.0149, "step": 2353 }, { "epoch": 0.8023176550783913, "grad_norm": 13.78351743644992, "learning_rate": 9.171820316325453e-06, "loss": 3.3189, "step": 2354 }, { "epoch": 0.8026584867075665, "grad_norm": 15.608262935134798, "learning_rate": 9.170726888180996e-06, "loss": 3.9005, "step": 2355 }, { "epoch": 0.8029993183367417, "grad_norm": 18.30825964047596, "learning_rate": 9.169632803964599e-06, "loss": 3.4273, "step": 2356 }, { "epoch": 0.8033401499659168, "grad_norm": 22.94558890042387, "learning_rate": 9.168538063848362e-06, "loss": 3.6191, "step": 2357 }, { "epoch": 0.803680981595092, "grad_norm": 14.26076532090428, "learning_rate": 9.167442668004493e-06, "loss": 2.7997, "step": 2358 }, { "epoch": 0.8040218132242672, "grad_norm": 22.582267832754667, "learning_rate": 9.166346616605304e-06, "loss": 3.5801, "step": 2359 }, { "epoch": 0.8043626448534424, "grad_norm": 19.14479656871101, "learning_rate": 9.165249909823206e-06, "loss": 3.2874, "step": 2360 }, { "epoch": 0.8047034764826176, "grad_norm": 18.825916450723792, "learning_rate": 9.164152547830715e-06, "loss": 3.5138, "step": 2361 }, { "epoch": 0.8050443081117927, "grad_norm": 23.888060596199878, "learning_rate": 9.163054530800455e-06, "loss": 3.4394, "step": 2362 }, { "epoch": 0.8053851397409679, "grad_norm": 16.73340068848952, "learning_rate": 9.161955858905142e-06, "loss": 3.8995, "step": 2363 }, { "epoch": 0.8057259713701431, "grad_norm": 22.950317693289552, "learning_rate": 9.160856532317607e-06, "loss": 3.6838, "step": 2364 }, { "epoch": 0.8060668029993183, "grad_norm": 19.803337146358142, "learning_rate": 9.159756551210774e-06, "loss": 3.4253, "step": 2365 }, { "epoch": 0.8064076346284935, "grad_norm": 17.603503635671508, "learning_rate": 9.158655915757678e-06, "loss": 3.0491, "step": 2366 }, { "epoch": 0.8067484662576687, "grad_norm": 22.762761041897896, "learning_rate": 9.15755462613145e-06, "loss": 3.1037, "step": 2367 }, { "epoch": 0.8070892978868439, "grad_norm": 16.939096282527, "learning_rate": 9.156452682505332e-06, "loss": 3.5573, "step": 2368 }, { "epoch": 0.8074301295160191, "grad_norm": 20.357862036535888, "learning_rate": 9.155350085052659e-06, "loss": 3.6476, "step": 2369 }, { "epoch": 0.8077709611451943, "grad_norm": 17.866436859867207, "learning_rate": 9.154246833946876e-06, "loss": 3.3504, "step": 2370 }, { "epoch": 0.8081117927743695, "grad_norm": 16.863999125204725, "learning_rate": 9.15314292936153e-06, "loss": 3.7055, "step": 2371 }, { "epoch": 0.8084526244035446, "grad_norm": 16.778370483826215, "learning_rate": 9.152038371470269e-06, "loss": 3.5891, "step": 2372 }, { "epoch": 0.8087934560327198, "grad_norm": 19.142149206087602, "learning_rate": 9.150933160446842e-06, "loss": 3.0667, "step": 2373 }, { "epoch": 0.809134287661895, "grad_norm": 13.203414553655334, "learning_rate": 9.149827296465107e-06, "loss": 3.4842, "step": 2374 }, { "epoch": 0.8094751192910702, "grad_norm": 12.894850604240622, "learning_rate": 9.148720779699016e-06, "loss": 3.145, "step": 2375 }, { "epoch": 0.8098159509202454, "grad_norm": 13.71550831433336, "learning_rate": 9.147613610322633e-06, "loss": 2.9144, "step": 2376 }, { "epoch": 0.8101567825494206, "grad_norm": 23.291364445482547, "learning_rate": 9.146505788510118e-06, "loss": 3.4701, "step": 2377 }, { "epoch": 0.8104976141785958, "grad_norm": 18.776741621628137, "learning_rate": 9.145397314435737e-06, "loss": 3.4881, "step": 2378 }, { "epoch": 0.810838445807771, "grad_norm": 18.634415765252427, "learning_rate": 9.144288188273856e-06, "loss": 2.9467, "step": 2379 }, { "epoch": 0.8111792774369462, "grad_norm": 17.09355088889809, "learning_rate": 9.143178410198945e-06, "loss": 3.1055, "step": 2380 }, { "epoch": 0.8115201090661214, "grad_norm": 13.722790094409966, "learning_rate": 9.142067980385579e-06, "loss": 2.8237, "step": 2381 }, { "epoch": 0.8118609406952966, "grad_norm": 27.042214737548687, "learning_rate": 9.140956899008428e-06, "loss": 3.2906, "step": 2382 }, { "epoch": 0.8122017723244717, "grad_norm": 17.997008144298885, "learning_rate": 9.139845166242274e-06, "loss": 2.8277, "step": 2383 }, { "epoch": 0.8125426039536469, "grad_norm": 13.97319413324819, "learning_rate": 9.138732782261997e-06, "loss": 3.6137, "step": 2384 }, { "epoch": 0.8128834355828221, "grad_norm": 18.020254213177534, "learning_rate": 9.137619747242577e-06, "loss": 3.3837, "step": 2385 }, { "epoch": 0.8132242672119973, "grad_norm": 13.357446076596586, "learning_rate": 9.136506061359098e-06, "loss": 3.1552, "step": 2386 }, { "epoch": 0.8135650988411725, "grad_norm": 15.06226979787106, "learning_rate": 9.135391724786751e-06, "loss": 3.5435, "step": 2387 }, { "epoch": 0.8139059304703476, "grad_norm": 26.298947481837843, "learning_rate": 9.134276737700824e-06, "loss": 2.4501, "step": 2388 }, { "epoch": 0.8142467620995228, "grad_norm": 21.213045751393793, "learning_rate": 9.133161100276708e-06, "loss": 3.0864, "step": 2389 }, { "epoch": 0.814587593728698, "grad_norm": 18.628118743982064, "learning_rate": 9.132044812689897e-06, "loss": 3.5066, "step": 2390 }, { "epoch": 0.8149284253578732, "grad_norm": 16.01425335415117, "learning_rate": 9.13092787511599e-06, "loss": 3.1766, "step": 2391 }, { "epoch": 0.8152692569870484, "grad_norm": 23.446313718431334, "learning_rate": 9.129810287730683e-06, "loss": 3.0271, "step": 2392 }, { "epoch": 0.8156100886162236, "grad_norm": 19.887136005355508, "learning_rate": 9.128692050709778e-06, "loss": 3.0478, "step": 2393 }, { "epoch": 0.8159509202453987, "grad_norm": 25.76636030563254, "learning_rate": 9.127573164229178e-06, "loss": 3.5309, "step": 2394 }, { "epoch": 0.8162917518745739, "grad_norm": 34.76967508042324, "learning_rate": 9.126453628464889e-06, "loss": 3.5035, "step": 2395 }, { "epoch": 0.8166325835037491, "grad_norm": 17.88164012731075, "learning_rate": 9.125333443593017e-06, "loss": 3.428, "step": 2396 }, { "epoch": 0.8169734151329243, "grad_norm": 16.423614585048547, "learning_rate": 9.12421260978977e-06, "loss": 2.7889, "step": 2397 }, { "epoch": 0.8173142467620995, "grad_norm": 12.427463435852337, "learning_rate": 9.123091127231465e-06, "loss": 3.0426, "step": 2398 }, { "epoch": 0.8176550783912747, "grad_norm": 22.3407799054145, "learning_rate": 9.121968996094513e-06, "loss": 3.5196, "step": 2399 }, { "epoch": 0.8179959100204499, "grad_norm": 15.373162885008144, "learning_rate": 9.120846216555428e-06, "loss": 3.486, "step": 2400 }, { "epoch": 0.8183367416496251, "grad_norm": 13.575897651562975, "learning_rate": 9.119722788790828e-06, "loss": 3.5517, "step": 2401 }, { "epoch": 0.8186775732788003, "grad_norm": 15.043518184401005, "learning_rate": 9.118598712977436e-06, "loss": 3.4028, "step": 2402 }, { "epoch": 0.8190184049079755, "grad_norm": 12.561947053777114, "learning_rate": 9.11747398929207e-06, "loss": 2.7213, "step": 2403 }, { "epoch": 0.8193592365371506, "grad_norm": 26.910260138370653, "learning_rate": 9.116348617911654e-06, "loss": 3.3318, "step": 2404 }, { "epoch": 0.8197000681663258, "grad_norm": 20.406348847036732, "learning_rate": 9.115222599013215e-06, "loss": 2.9833, "step": 2405 }, { "epoch": 0.820040899795501, "grad_norm": 20.077582629467837, "learning_rate": 9.11409593277388e-06, "loss": 3.6674, "step": 2406 }, { "epoch": 0.8203817314246762, "grad_norm": 20.5501928601293, "learning_rate": 9.112968619370877e-06, "loss": 3.3529, "step": 2407 }, { "epoch": 0.8207225630538514, "grad_norm": 15.496002874117803, "learning_rate": 9.111840658981537e-06, "loss": 3.1515, "step": 2408 }, { "epoch": 0.8210633946830266, "grad_norm": 46.059140692449326, "learning_rate": 9.110712051783293e-06, "loss": 3.3705, "step": 2409 }, { "epoch": 0.8214042263122018, "grad_norm": 13.272982148816922, "learning_rate": 9.10958279795368e-06, "loss": 2.8387, "step": 2410 }, { "epoch": 0.821745057941377, "grad_norm": 15.247866325710303, "learning_rate": 9.108452897670333e-06, "loss": 3.2503, "step": 2411 }, { "epoch": 0.8220858895705522, "grad_norm": 14.579323698663247, "learning_rate": 9.107322351110991e-06, "loss": 3.3064, "step": 2412 }, { "epoch": 0.8224267211997274, "grad_norm": 13.37530579257486, "learning_rate": 9.106191158453493e-06, "loss": 3.2966, "step": 2413 }, { "epoch": 0.8227675528289026, "grad_norm": 22.508503506103317, "learning_rate": 9.10505931987578e-06, "loss": 3.4239, "step": 2414 }, { "epoch": 0.8231083844580777, "grad_norm": 17.309053332200968, "learning_rate": 9.103926835555894e-06, "loss": 3.3768, "step": 2415 }, { "epoch": 0.8234492160872529, "grad_norm": 13.756427376603552, "learning_rate": 9.10279370567198e-06, "loss": 3.3378, "step": 2416 }, { "epoch": 0.823790047716428, "grad_norm": 25.795961643707024, "learning_rate": 9.101659930402284e-06, "loss": 2.4557, "step": 2417 }, { "epoch": 0.8241308793456033, "grad_norm": 20.225826048503585, "learning_rate": 9.100525509925154e-06, "loss": 3.4185, "step": 2418 }, { "epoch": 0.8244717109747784, "grad_norm": 16.17049597792254, "learning_rate": 9.099390444419038e-06, "loss": 2.7882, "step": 2419 }, { "epoch": 0.8248125426039536, "grad_norm": 19.218170968942456, "learning_rate": 9.098254734062486e-06, "loss": 2.9817, "step": 2420 }, { "epoch": 0.8251533742331288, "grad_norm": 17.045483323229746, "learning_rate": 9.09711837903415e-06, "loss": 3.2429, "step": 2421 }, { "epoch": 0.825494205862304, "grad_norm": 16.286686798040616, "learning_rate": 9.095981379512782e-06, "loss": 3.4329, "step": 2422 }, { "epoch": 0.8258350374914792, "grad_norm": 25.816461458544147, "learning_rate": 9.09484373567724e-06, "loss": 3.5333, "step": 2423 }, { "epoch": 0.8261758691206544, "grad_norm": 14.241033207995622, "learning_rate": 9.093705447706475e-06, "loss": 2.9512, "step": 2424 }, { "epoch": 0.8265167007498296, "grad_norm": 12.772043947182974, "learning_rate": 9.092566515779551e-06, "loss": 2.8783, "step": 2425 }, { "epoch": 0.8268575323790047, "grad_norm": 23.760269167717404, "learning_rate": 9.091426940075619e-06, "loss": 2.9539, "step": 2426 }, { "epoch": 0.8271983640081799, "grad_norm": 20.847602265662346, "learning_rate": 9.090286720773943e-06, "loss": 3.578, "step": 2427 }, { "epoch": 0.8275391956373551, "grad_norm": 24.873861100895716, "learning_rate": 9.089145858053885e-06, "loss": 3.3606, "step": 2428 }, { "epoch": 0.8278800272665303, "grad_norm": 12.696497252710875, "learning_rate": 9.088004352094904e-06, "loss": 3.1954, "step": 2429 }, { "epoch": 0.8282208588957055, "grad_norm": 15.355184871125521, "learning_rate": 9.086862203076565e-06, "loss": 2.8093, "step": 2430 }, { "epoch": 0.8285616905248807, "grad_norm": 19.41158390389094, "learning_rate": 9.085719411178533e-06, "loss": 3.4925, "step": 2431 }, { "epoch": 0.8289025221540559, "grad_norm": 17.812016141980532, "learning_rate": 9.084575976580574e-06, "loss": 3.1239, "step": 2432 }, { "epoch": 0.8292433537832311, "grad_norm": 18.1961037022868, "learning_rate": 9.083431899462554e-06, "loss": 3.2766, "step": 2433 }, { "epoch": 0.8295841854124063, "grad_norm": 20.94441451290299, "learning_rate": 9.08228718000444e-06, "loss": 2.9311, "step": 2434 }, { "epoch": 0.8299250170415815, "grad_norm": 13.459440113524185, "learning_rate": 9.081141818386302e-06, "loss": 3.1426, "step": 2435 }, { "epoch": 0.8302658486707567, "grad_norm": 20.99525134659867, "learning_rate": 9.079995814788311e-06, "loss": 3.0768, "step": 2436 }, { "epoch": 0.8306066802999318, "grad_norm": 16.907365971082864, "learning_rate": 9.078849169390736e-06, "loss": 3.114, "step": 2437 }, { "epoch": 0.830947511929107, "grad_norm": 15.335619913444427, "learning_rate": 9.077701882373949e-06, "loss": 3.4113, "step": 2438 }, { "epoch": 0.8312883435582822, "grad_norm": 19.82277632244142, "learning_rate": 9.076553953918426e-06, "loss": 2.9033, "step": 2439 }, { "epoch": 0.8316291751874574, "grad_norm": 18.6016498122997, "learning_rate": 9.075405384204736e-06, "loss": 2.9175, "step": 2440 }, { "epoch": 0.8319700068166326, "grad_norm": 14.73140702180242, "learning_rate": 9.074256173413555e-06, "loss": 3.2264, "step": 2441 }, { "epoch": 0.8323108384458078, "grad_norm": 25.396576792435877, "learning_rate": 9.073106321725662e-06, "loss": 3.9497, "step": 2442 }, { "epoch": 0.832651670074983, "grad_norm": 29.98033404228255, "learning_rate": 9.071955829321928e-06, "loss": 2.9884, "step": 2443 }, { "epoch": 0.8329925017041582, "grad_norm": 19.24445993020744, "learning_rate": 9.070804696383334e-06, "loss": 3.6053, "step": 2444 }, { "epoch": 0.8333333333333334, "grad_norm": 23.19408337857866, "learning_rate": 9.069652923090955e-06, "loss": 3.4011, "step": 2445 }, { "epoch": 0.8336741649625086, "grad_norm": 14.846727861619131, "learning_rate": 9.068500509625973e-06, "loss": 3.0189, "step": 2446 }, { "epoch": 0.8340149965916837, "grad_norm": 13.637859909094301, "learning_rate": 9.067347456169664e-06, "loss": 3.4156, "step": 2447 }, { "epoch": 0.8343558282208589, "grad_norm": 24.76251836849556, "learning_rate": 9.066193762903408e-06, "loss": 3.6889, "step": 2448 }, { "epoch": 0.834696659850034, "grad_norm": 10.799710767518677, "learning_rate": 9.065039430008689e-06, "loss": 2.9284, "step": 2449 }, { "epoch": 0.8350374914792092, "grad_norm": 19.117839046082647, "learning_rate": 9.063884457667083e-06, "loss": 3.5847, "step": 2450 }, { "epoch": 0.8353783231083844, "grad_norm": 20.512761502020748, "learning_rate": 9.062728846060276e-06, "loss": 3.854, "step": 2451 }, { "epoch": 0.8357191547375596, "grad_norm": 15.923363440071734, "learning_rate": 9.061572595370048e-06, "loss": 3.473, "step": 2452 }, { "epoch": 0.8360599863667348, "grad_norm": 19.425293891256654, "learning_rate": 9.060415705778282e-06, "loss": 3.6185, "step": 2453 }, { "epoch": 0.83640081799591, "grad_norm": 12.909506185386316, "learning_rate": 9.059258177466963e-06, "loss": 3.3467, "step": 2454 }, { "epoch": 0.8367416496250852, "grad_norm": 19.047711588685917, "learning_rate": 9.058100010618174e-06, "loss": 3.5846, "step": 2455 }, { "epoch": 0.8370824812542604, "grad_norm": 17.84880232164407, "learning_rate": 9.056941205414097e-06, "loss": 3.8702, "step": 2456 }, { "epoch": 0.8374233128834356, "grad_norm": 14.893759959810534, "learning_rate": 9.055781762037022e-06, "loss": 3.0417, "step": 2457 }, { "epoch": 0.8377641445126107, "grad_norm": 13.54748927652795, "learning_rate": 9.054621680669328e-06, "loss": 3.1686, "step": 2458 }, { "epoch": 0.8381049761417859, "grad_norm": 22.85508075538054, "learning_rate": 9.053460961493503e-06, "loss": 3.0726, "step": 2459 }, { "epoch": 0.8384458077709611, "grad_norm": 14.65925903924751, "learning_rate": 9.052299604692134e-06, "loss": 3.1139, "step": 2460 }, { "epoch": 0.8387866394001363, "grad_norm": 17.48110893045032, "learning_rate": 9.051137610447907e-06, "loss": 3.6512, "step": 2461 }, { "epoch": 0.8391274710293115, "grad_norm": 22.574932644727326, "learning_rate": 9.049974978943604e-06, "loss": 3.9933, "step": 2462 }, { "epoch": 0.8394683026584867, "grad_norm": 17.4262887398651, "learning_rate": 9.048811710362117e-06, "loss": 3.6163, "step": 2463 }, { "epoch": 0.8398091342876619, "grad_norm": 26.208599743132172, "learning_rate": 9.04764780488643e-06, "loss": 3.2557, "step": 2464 }, { "epoch": 0.8401499659168371, "grad_norm": 16.119061390475427, "learning_rate": 9.04648326269963e-06, "loss": 3.4775, "step": 2465 }, { "epoch": 0.8404907975460123, "grad_norm": 19.435518692497183, "learning_rate": 9.045318083984906e-06, "loss": 3.5049, "step": 2466 }, { "epoch": 0.8408316291751875, "grad_norm": 18.90957280580403, "learning_rate": 9.044152268925543e-06, "loss": 3.3982, "step": 2467 }, { "epoch": 0.8411724608043627, "grad_norm": 29.880578125610764, "learning_rate": 9.04298581770493e-06, "loss": 3.1759, "step": 2468 }, { "epoch": 0.8415132924335378, "grad_norm": 15.513053999997519, "learning_rate": 9.041818730506553e-06, "loss": 3.11, "step": 2469 }, { "epoch": 0.841854124062713, "grad_norm": 18.36905387435256, "learning_rate": 9.040651007514001e-06, "loss": 3.4063, "step": 2470 }, { "epoch": 0.8421949556918882, "grad_norm": 16.907538978448553, "learning_rate": 9.03948264891096e-06, "loss": 3.18, "step": 2471 }, { "epoch": 0.8425357873210634, "grad_norm": 13.4953091467825, "learning_rate": 9.038313654881218e-06, "loss": 3.4582, "step": 2472 }, { "epoch": 0.8428766189502386, "grad_norm": 16.874819246488784, "learning_rate": 9.037144025608661e-06, "loss": 3.5823, "step": 2473 }, { "epoch": 0.8432174505794138, "grad_norm": 16.821846157628837, "learning_rate": 9.035973761277281e-06, "loss": 3.2487, "step": 2474 }, { "epoch": 0.843558282208589, "grad_norm": 13.140188865934732, "learning_rate": 9.034802862071159e-06, "loss": 3.147, "step": 2475 }, { "epoch": 0.8438991138377642, "grad_norm": 14.556898203645924, "learning_rate": 9.033631328174488e-06, "loss": 3.5957, "step": 2476 }, { "epoch": 0.8442399454669394, "grad_norm": 20.063303111708247, "learning_rate": 9.03245915977155e-06, "loss": 2.8996, "step": 2477 }, { "epoch": 0.8445807770961146, "grad_norm": 18.330753179077945, "learning_rate": 9.031286357046737e-06, "loss": 4.0325, "step": 2478 }, { "epoch": 0.8449216087252897, "grad_norm": 19.045012816527162, "learning_rate": 9.030112920184528e-06, "loss": 3.4495, "step": 2479 }, { "epoch": 0.8452624403544649, "grad_norm": 22.706467739233304, "learning_rate": 9.028938849369516e-06, "loss": 3.6744, "step": 2480 }, { "epoch": 0.84560327198364, "grad_norm": 24.975215971900436, "learning_rate": 9.027764144786384e-06, "loss": 3.3368, "step": 2481 }, { "epoch": 0.8459441036128152, "grad_norm": 24.77526581804242, "learning_rate": 9.02658880661992e-06, "loss": 4.0264, "step": 2482 }, { "epoch": 0.8462849352419904, "grad_norm": 21.3977822655649, "learning_rate": 9.025412835055005e-06, "loss": 3.2643, "step": 2483 }, { "epoch": 0.8466257668711656, "grad_norm": 21.232365099631405, "learning_rate": 9.02423623027663e-06, "loss": 3.1365, "step": 2484 }, { "epoch": 0.8469665985003408, "grad_norm": 13.079303062069254, "learning_rate": 9.023058992469875e-06, "loss": 3.1411, "step": 2485 }, { "epoch": 0.847307430129516, "grad_norm": 15.993365839076109, "learning_rate": 9.021881121819925e-06, "loss": 3.1909, "step": 2486 }, { "epoch": 0.8476482617586912, "grad_norm": 21.11432695815161, "learning_rate": 9.020702618512064e-06, "loss": 4.1149, "step": 2487 }, { "epoch": 0.8479890933878664, "grad_norm": 17.577805747600387, "learning_rate": 9.019523482731678e-06, "loss": 3.7953, "step": 2488 }, { "epoch": 0.8483299250170416, "grad_norm": 19.426164864440853, "learning_rate": 9.018343714664245e-06, "loss": 3.4335, "step": 2489 }, { "epoch": 0.8486707566462167, "grad_norm": 23.54572524456234, "learning_rate": 9.01716331449535e-06, "loss": 2.9619, "step": 2490 }, { "epoch": 0.8490115882753919, "grad_norm": 17.943598071111115, "learning_rate": 9.015982282410675e-06, "loss": 3.6034, "step": 2491 }, { "epoch": 0.8493524199045671, "grad_norm": 19.280876985047392, "learning_rate": 9.014800618596e-06, "loss": 3.2417, "step": 2492 }, { "epoch": 0.8496932515337423, "grad_norm": 13.679680002212928, "learning_rate": 9.013618323237206e-06, "loss": 3.3964, "step": 2493 }, { "epoch": 0.8500340831629175, "grad_norm": 15.412282125205346, "learning_rate": 9.012435396520271e-06, "loss": 3.2454, "step": 2494 }, { "epoch": 0.8503749147920927, "grad_norm": 13.54520234144716, "learning_rate": 9.011251838631277e-06, "loss": 2.7674, "step": 2495 }, { "epoch": 0.8507157464212679, "grad_norm": 18.245441284771918, "learning_rate": 9.0100676497564e-06, "loss": 2.9394, "step": 2496 }, { "epoch": 0.8510565780504431, "grad_norm": 26.805163147213598, "learning_rate": 9.008882830081919e-06, "loss": 3.3999, "step": 2497 }, { "epoch": 0.8513974096796183, "grad_norm": 15.935504749274058, "learning_rate": 9.007697379794212e-06, "loss": 3.3175, "step": 2498 }, { "epoch": 0.8517382413087935, "grad_norm": 25.039339599793102, "learning_rate": 9.006511299079752e-06, "loss": 3.3837, "step": 2499 }, { "epoch": 0.8520790729379687, "grad_norm": 17.175925734247897, "learning_rate": 9.005324588125115e-06, "loss": 3.3645, "step": 2500 }, { "epoch": 0.8524199045671438, "grad_norm": 16.20280138368815, "learning_rate": 9.004137247116977e-06, "loss": 3.2318, "step": 2501 }, { "epoch": 0.852760736196319, "grad_norm": 16.67129545535022, "learning_rate": 9.00294927624211e-06, "loss": 3.4075, "step": 2502 }, { "epoch": 0.8531015678254942, "grad_norm": 18.902281290478683, "learning_rate": 9.001760675687387e-06, "loss": 3.1946, "step": 2503 }, { "epoch": 0.8534423994546694, "grad_norm": 18.585585563316393, "learning_rate": 9.000571445639779e-06, "loss": 2.9643, "step": 2504 }, { "epoch": 0.8537832310838446, "grad_norm": 17.959435765312417, "learning_rate": 8.999381586286359e-06, "loss": 3.1123, "step": 2505 }, { "epoch": 0.8541240627130198, "grad_norm": 13.102107563255688, "learning_rate": 8.998191097814292e-06, "loss": 3.2032, "step": 2506 }, { "epoch": 0.854464894342195, "grad_norm": 18.91979014082419, "learning_rate": 8.996999980410852e-06, "loss": 3.625, "step": 2507 }, { "epoch": 0.8548057259713702, "grad_norm": 14.285052271847208, "learning_rate": 8.995808234263404e-06, "loss": 3.155, "step": 2508 }, { "epoch": 0.8551465576005454, "grad_norm": 14.643966573414696, "learning_rate": 8.994615859559412e-06, "loss": 3.427, "step": 2509 }, { "epoch": 0.8554873892297206, "grad_norm": 13.061251265095386, "learning_rate": 8.993422856486446e-06, "loss": 3.1418, "step": 2510 }, { "epoch": 0.8558282208588958, "grad_norm": 17.155698451420008, "learning_rate": 8.992229225232167e-06, "loss": 3.3005, "step": 2511 }, { "epoch": 0.8561690524880708, "grad_norm": 19.064644379240814, "learning_rate": 8.99103496598434e-06, "loss": 3.8094, "step": 2512 }, { "epoch": 0.856509884117246, "grad_norm": 14.751398462071323, "learning_rate": 8.989840078930822e-06, "loss": 3.4364, "step": 2513 }, { "epoch": 0.8568507157464212, "grad_norm": 16.08250782149595, "learning_rate": 8.988644564259581e-06, "loss": 3.1978, "step": 2514 }, { "epoch": 0.8571915473755964, "grad_norm": 21.42620765697391, "learning_rate": 8.98744842215867e-06, "loss": 3.5852, "step": 2515 }, { "epoch": 0.8575323790047716, "grad_norm": 12.933862433069027, "learning_rate": 8.98625165281625e-06, "loss": 2.8531, "step": 2516 }, { "epoch": 0.8578732106339468, "grad_norm": 20.844742727043727, "learning_rate": 8.985054256420575e-06, "loss": 3.908, "step": 2517 }, { "epoch": 0.858214042263122, "grad_norm": 12.937282955917563, "learning_rate": 8.983856233160003e-06, "loss": 3.1311, "step": 2518 }, { "epoch": 0.8585548738922972, "grad_norm": 18.44922679132292, "learning_rate": 8.982657583222986e-06, "loss": 3.4946, "step": 2519 }, { "epoch": 0.8588957055214724, "grad_norm": 18.977044500937282, "learning_rate": 8.981458306798078e-06, "loss": 3.8432, "step": 2520 }, { "epoch": 0.8592365371506476, "grad_norm": 13.996163129910174, "learning_rate": 8.98025840407393e-06, "loss": 3.1883, "step": 2521 }, { "epoch": 0.8595773687798227, "grad_norm": 20.27690789780118, "learning_rate": 8.97905787523929e-06, "loss": 3.2158, "step": 2522 }, { "epoch": 0.8599182004089979, "grad_norm": 13.099504452489223, "learning_rate": 8.977856720483005e-06, "loss": 2.9615, "step": 2523 }, { "epoch": 0.8602590320381731, "grad_norm": 17.173977075701263, "learning_rate": 8.976654939994023e-06, "loss": 3.3923, "step": 2524 }, { "epoch": 0.8605998636673483, "grad_norm": 16.589256787511843, "learning_rate": 8.97545253396139e-06, "loss": 3.4206, "step": 2525 }, { "epoch": 0.8609406952965235, "grad_norm": 26.911864285849596, "learning_rate": 8.974249502574248e-06, "loss": 2.8757, "step": 2526 }, { "epoch": 0.8612815269256987, "grad_norm": 13.76769376218054, "learning_rate": 8.973045846021837e-06, "loss": 3.459, "step": 2527 }, { "epoch": 0.8616223585548739, "grad_norm": 17.32405517640472, "learning_rate": 8.971841564493502e-06, "loss": 3.6824, "step": 2528 }, { "epoch": 0.8619631901840491, "grad_norm": 12.6890369430944, "learning_rate": 8.970636658178675e-06, "loss": 3.2362, "step": 2529 }, { "epoch": 0.8623040218132243, "grad_norm": 16.261626020665148, "learning_rate": 8.969431127266896e-06, "loss": 3.6278, "step": 2530 }, { "epoch": 0.8626448534423995, "grad_norm": 22.831061455606793, "learning_rate": 8.9682249719478e-06, "loss": 3.6986, "step": 2531 }, { "epoch": 0.8629856850715747, "grad_norm": 21.416361626774027, "learning_rate": 8.967018192411118e-06, "loss": 3.4809, "step": 2532 }, { "epoch": 0.8633265167007498, "grad_norm": 15.244843351145281, "learning_rate": 8.965810788846685e-06, "loss": 3.1427, "step": 2533 }, { "epoch": 0.863667348329925, "grad_norm": 17.64426522358967, "learning_rate": 8.964602761444424e-06, "loss": 3.5285, "step": 2534 }, { "epoch": 0.8640081799591002, "grad_norm": 18.509463248903714, "learning_rate": 8.963394110394371e-06, "loss": 3.0782, "step": 2535 }, { "epoch": 0.8643490115882754, "grad_norm": 17.740652728035393, "learning_rate": 8.962184835886644e-06, "loss": 3.1384, "step": 2536 }, { "epoch": 0.8646898432174506, "grad_norm": 18.261553423135222, "learning_rate": 8.960974938111471e-06, "loss": 3.544, "step": 2537 }, { "epoch": 0.8650306748466258, "grad_norm": 19.23172705867055, "learning_rate": 8.959764417259172e-06, "loss": 3.6919, "step": 2538 }, { "epoch": 0.865371506475801, "grad_norm": 16.573000635196315, "learning_rate": 8.958553273520168e-06, "loss": 3.0245, "step": 2539 }, { "epoch": 0.8657123381049762, "grad_norm": 15.598739860282748, "learning_rate": 8.957341507084975e-06, "loss": 3.016, "step": 2540 }, { "epoch": 0.8660531697341514, "grad_norm": 23.669345053206843, "learning_rate": 8.956129118144211e-06, "loss": 3.9527, "step": 2541 }, { "epoch": 0.8663940013633266, "grad_norm": 16.45766325185923, "learning_rate": 8.954916106888587e-06, "loss": 3.3011, "step": 2542 }, { "epoch": 0.8667348329925018, "grad_norm": 13.616589793924241, "learning_rate": 8.953702473508916e-06, "loss": 3.5519, "step": 2543 }, { "epoch": 0.8670756646216768, "grad_norm": 10.94836284850705, "learning_rate": 8.952488218196107e-06, "loss": 3.3588, "step": 2544 }, { "epoch": 0.867416496250852, "grad_norm": 16.15433096412706, "learning_rate": 8.951273341141169e-06, "loss": 3.2061, "step": 2545 }, { "epoch": 0.8677573278800272, "grad_norm": 17.644814216403923, "learning_rate": 8.950057842535204e-06, "loss": 3.1495, "step": 2546 }, { "epoch": 0.8680981595092024, "grad_norm": 14.956051300410229, "learning_rate": 8.948841722569416e-06, "loss": 2.8823, "step": 2547 }, { "epoch": 0.8684389911383776, "grad_norm": 12.032389184245892, "learning_rate": 8.947624981435105e-06, "loss": 3.1144, "step": 2548 }, { "epoch": 0.8687798227675528, "grad_norm": 17.907122893700418, "learning_rate": 8.946407619323671e-06, "loss": 3.1172, "step": 2549 }, { "epoch": 0.869120654396728, "grad_norm": 14.804162630475096, "learning_rate": 8.945189636426608e-06, "loss": 3.2271, "step": 2550 }, { "epoch": 0.8694614860259032, "grad_norm": 15.914750924454655, "learning_rate": 8.94397103293551e-06, "loss": 3.2645, "step": 2551 }, { "epoch": 0.8698023176550784, "grad_norm": 18.380186634115482, "learning_rate": 8.94275180904207e-06, "loss": 2.9581, "step": 2552 }, { "epoch": 0.8701431492842536, "grad_norm": 25.636631508478025, "learning_rate": 8.941531964938076e-06, "loss": 3.7498, "step": 2553 }, { "epoch": 0.8704839809134287, "grad_norm": 19.45822524927454, "learning_rate": 8.940311500815411e-06, "loss": 3.4741, "step": 2554 }, { "epoch": 0.8708248125426039, "grad_norm": 26.137589895865297, "learning_rate": 8.939090416866062e-06, "loss": 3.4975, "step": 2555 }, { "epoch": 0.8711656441717791, "grad_norm": 15.744131524949397, "learning_rate": 8.93786871328211e-06, "loss": 3.1802, "step": 2556 }, { "epoch": 0.8715064758009543, "grad_norm": 15.206287328748104, "learning_rate": 8.936646390255734e-06, "loss": 3.3128, "step": 2557 }, { "epoch": 0.8718473074301295, "grad_norm": 16.97659502609929, "learning_rate": 8.93542344797921e-06, "loss": 3.3851, "step": 2558 }, { "epoch": 0.8721881390593047, "grad_norm": 13.400484151900626, "learning_rate": 8.934199886644911e-06, "loss": 3.0789, "step": 2559 }, { "epoch": 0.8725289706884799, "grad_norm": 42.66915334720432, "learning_rate": 8.93297570644531e-06, "loss": 3.5284, "step": 2560 }, { "epoch": 0.8728698023176551, "grad_norm": 15.135592834450856, "learning_rate": 8.931750907572972e-06, "loss": 3.3116, "step": 2561 }, { "epoch": 0.8732106339468303, "grad_norm": 18.72139133061072, "learning_rate": 8.930525490220566e-06, "loss": 3.1384, "step": 2562 }, { "epoch": 0.8735514655760055, "grad_norm": 14.627331797921567, "learning_rate": 8.929299454580855e-06, "loss": 3.1913, "step": 2563 }, { "epoch": 0.8738922972051807, "grad_norm": 14.619976276925701, "learning_rate": 8.928072800846697e-06, "loss": 3.4051, "step": 2564 }, { "epoch": 0.8742331288343558, "grad_norm": 16.836077326456703, "learning_rate": 8.92684552921105e-06, "loss": 3.2225, "step": 2565 }, { "epoch": 0.874573960463531, "grad_norm": 22.970586464106376, "learning_rate": 8.92561763986697e-06, "loss": 3.4569, "step": 2566 }, { "epoch": 0.8749147920927062, "grad_norm": 11.549589888000659, "learning_rate": 8.924389133007609e-06, "loss": 3.2314, "step": 2567 }, { "epoch": 0.8752556237218814, "grad_norm": 12.639525829463885, "learning_rate": 8.923160008826216e-06, "loss": 3.0867, "step": 2568 }, { "epoch": 0.8755964553510566, "grad_norm": 35.08922376351978, "learning_rate": 8.921930267516133e-06, "loss": 3.899, "step": 2569 }, { "epoch": 0.8759372869802318, "grad_norm": 16.791122756801638, "learning_rate": 8.920699909270809e-06, "loss": 3.533, "step": 2570 }, { "epoch": 0.876278118609407, "grad_norm": 11.905418741636423, "learning_rate": 8.91946893428378e-06, "loss": 3.1115, "step": 2571 }, { "epoch": 0.8766189502385822, "grad_norm": 16.963960207749334, "learning_rate": 8.918237342748687e-06, "loss": 2.9679, "step": 2572 }, { "epoch": 0.8769597818677574, "grad_norm": 23.23187854404608, "learning_rate": 8.917005134859263e-06, "loss": 3.8656, "step": 2573 }, { "epoch": 0.8773006134969326, "grad_norm": 29.571167720906537, "learning_rate": 8.915772310809338e-06, "loss": 3.6199, "step": 2574 }, { "epoch": 0.8776414451261078, "grad_norm": 19.864558980311482, "learning_rate": 8.914538870792841e-06, "loss": 3.6041, "step": 2575 }, { "epoch": 0.8779822767552828, "grad_norm": 32.40597445636004, "learning_rate": 8.913304815003796e-06, "loss": 4.233, "step": 2576 }, { "epoch": 0.878323108384458, "grad_norm": 16.756252922968606, "learning_rate": 8.912070143636325e-06, "loss": 2.9932, "step": 2577 }, { "epoch": 0.8786639400136332, "grad_norm": 16.444427012291722, "learning_rate": 8.910834856884648e-06, "loss": 3.0674, "step": 2578 }, { "epoch": 0.8790047716428084, "grad_norm": 20.24359367469236, "learning_rate": 8.909598954943081e-06, "loss": 3.2323, "step": 2579 }, { "epoch": 0.8793456032719836, "grad_norm": 16.84198541480077, "learning_rate": 8.908362438006033e-06, "loss": 3.3313, "step": 2580 }, { "epoch": 0.8796864349011588, "grad_norm": 18.65773262037242, "learning_rate": 8.907125306268016e-06, "loss": 3.8243, "step": 2581 }, { "epoch": 0.880027266530334, "grad_norm": 13.605233998524524, "learning_rate": 8.905887559923635e-06, "loss": 3.0313, "step": 2582 }, { "epoch": 0.8803680981595092, "grad_norm": 21.57099224760928, "learning_rate": 8.904649199167594e-06, "loss": 3.5689, "step": 2583 }, { "epoch": 0.8807089297886844, "grad_norm": 17.094000848151907, "learning_rate": 8.903410224194689e-06, "loss": 3.4659, "step": 2584 }, { "epoch": 0.8810497614178596, "grad_norm": 15.82122564627449, "learning_rate": 8.902170635199818e-06, "loss": 2.8154, "step": 2585 }, { "epoch": 0.8813905930470347, "grad_norm": 18.012259037032894, "learning_rate": 8.900930432377972e-06, "loss": 2.9192, "step": 2586 }, { "epoch": 0.8817314246762099, "grad_norm": 17.303825443668035, "learning_rate": 8.899689615924242e-06, "loss": 3.0612, "step": 2587 }, { "epoch": 0.8820722563053851, "grad_norm": 15.522802778342063, "learning_rate": 8.898448186033808e-06, "loss": 2.5114, "step": 2588 }, { "epoch": 0.8824130879345603, "grad_norm": 34.81177648216177, "learning_rate": 8.897206142901958e-06, "loss": 3.3171, "step": 2589 }, { "epoch": 0.8827539195637355, "grad_norm": 14.443844361280465, "learning_rate": 8.895963486724066e-06, "loss": 3.016, "step": 2590 }, { "epoch": 0.8830947511929107, "grad_norm": 16.608527475103152, "learning_rate": 8.894720217695609e-06, "loss": 3.2254, "step": 2591 }, { "epoch": 0.8834355828220859, "grad_norm": 18.07300450932185, "learning_rate": 8.89347633601216e-06, "loss": 3.314, "step": 2592 }, { "epoch": 0.8837764144512611, "grad_norm": 17.99991018618848, "learning_rate": 8.892231841869383e-06, "loss": 3.2984, "step": 2593 }, { "epoch": 0.8841172460804363, "grad_norm": 23.68684902248102, "learning_rate": 8.890986735463042e-06, "loss": 3.2368, "step": 2594 }, { "epoch": 0.8844580777096115, "grad_norm": 12.377640533811284, "learning_rate": 8.889741016989e-06, "loss": 2.7937, "step": 2595 }, { "epoch": 0.8847989093387867, "grad_norm": 18.93954919053635, "learning_rate": 8.88849468664321e-06, "loss": 3.2709, "step": 2596 }, { "epoch": 0.8851397409679618, "grad_norm": 20.97760510726073, "learning_rate": 8.887247744621728e-06, "loss": 3.4956, "step": 2597 }, { "epoch": 0.885480572597137, "grad_norm": 10.243451637219906, "learning_rate": 8.886000191120702e-06, "loss": 3.1416, "step": 2598 }, { "epoch": 0.8858214042263122, "grad_norm": 17.135549004785315, "learning_rate": 8.884752026336376e-06, "loss": 3.8823, "step": 2599 }, { "epoch": 0.8861622358554874, "grad_norm": 13.179906951645833, "learning_rate": 8.883503250465092e-06, "loss": 3.063, "step": 2600 }, { "epoch": 0.8865030674846626, "grad_norm": 13.801727936293917, "learning_rate": 8.882253863703287e-06, "loss": 3.1578, "step": 2601 }, { "epoch": 0.8868438991138378, "grad_norm": 24.02401969243966, "learning_rate": 8.881003866247495e-06, "loss": 3.0646, "step": 2602 }, { "epoch": 0.887184730743013, "grad_norm": 14.187545116786932, "learning_rate": 8.879753258294346e-06, "loss": 2.9487, "step": 2603 }, { "epoch": 0.8875255623721882, "grad_norm": 14.544883275338014, "learning_rate": 8.878502040040564e-06, "loss": 3.3355, "step": 2604 }, { "epoch": 0.8878663940013634, "grad_norm": 21.339704329949885, "learning_rate": 8.877250211682973e-06, "loss": 3.3834, "step": 2605 }, { "epoch": 0.8882072256305386, "grad_norm": 15.434572608069374, "learning_rate": 8.875997773418489e-06, "loss": 3.2591, "step": 2606 }, { "epoch": 0.8885480572597138, "grad_norm": 19.429338636395887, "learning_rate": 8.874744725444124e-06, "loss": 3.2709, "step": 2607 }, { "epoch": 0.8888888888888888, "grad_norm": 17.940726478218682, "learning_rate": 8.873491067956992e-06, "loss": 3.8305, "step": 2608 }, { "epoch": 0.889229720518064, "grad_norm": 15.15179529357824, "learning_rate": 8.872236801154295e-06, "loss": 3.0941, "step": 2609 }, { "epoch": 0.8895705521472392, "grad_norm": 11.341647373505868, "learning_rate": 8.870981925233333e-06, "loss": 2.9114, "step": 2610 }, { "epoch": 0.8899113837764144, "grad_norm": 19.85778552703168, "learning_rate": 8.869726440391506e-06, "loss": 3.5337, "step": 2611 }, { "epoch": 0.8902522154055896, "grad_norm": 19.261066530346714, "learning_rate": 8.868470346826305e-06, "loss": 3.0625, "step": 2612 }, { "epoch": 0.8905930470347648, "grad_norm": 14.48092293300439, "learning_rate": 8.867213644735319e-06, "loss": 3.5505, "step": 2613 }, { "epoch": 0.89093387866394, "grad_norm": 17.052113760321582, "learning_rate": 8.865956334316233e-06, "loss": 2.8712, "step": 2614 }, { "epoch": 0.8912747102931152, "grad_norm": 17.143128505549743, "learning_rate": 8.864698415766825e-06, "loss": 3.5641, "step": 2615 }, { "epoch": 0.8916155419222904, "grad_norm": 14.717468469098675, "learning_rate": 8.86343988928497e-06, "loss": 3.0236, "step": 2616 }, { "epoch": 0.8919563735514656, "grad_norm": 13.978465000620862, "learning_rate": 8.862180755068644e-06, "loss": 2.9009, "step": 2617 }, { "epoch": 0.8922972051806408, "grad_norm": 14.779266877690182, "learning_rate": 8.860921013315911e-06, "loss": 3.4647, "step": 2618 }, { "epoch": 0.8926380368098159, "grad_norm": 16.323610915739796, "learning_rate": 8.859660664224931e-06, "loss": 3.069, "step": 2619 }, { "epoch": 0.8929788684389911, "grad_norm": 30.01579058630744, "learning_rate": 8.858399707993965e-06, "loss": 3.3625, "step": 2620 }, { "epoch": 0.8933197000681663, "grad_norm": 12.51917313153003, "learning_rate": 8.857138144821366e-06, "loss": 2.6183, "step": 2621 }, { "epoch": 0.8936605316973415, "grad_norm": 18.223577426403516, "learning_rate": 8.85587597490558e-06, "loss": 2.6415, "step": 2622 }, { "epoch": 0.8940013633265167, "grad_norm": 20.171186724358783, "learning_rate": 8.854613198445156e-06, "loss": 3.1098, "step": 2623 }, { "epoch": 0.8943421949556919, "grad_norm": 30.65630707879049, "learning_rate": 8.853349815638733e-06, "loss": 3.1062, "step": 2624 }, { "epoch": 0.8946830265848671, "grad_norm": 22.752388972794545, "learning_rate": 8.852085826685043e-06, "loss": 3.1068, "step": 2625 }, { "epoch": 0.8950238582140423, "grad_norm": 20.93027634972307, "learning_rate": 8.850821231782918e-06, "loss": 2.9937, "step": 2626 }, { "epoch": 0.8953646898432175, "grad_norm": 22.977787477439108, "learning_rate": 8.849556031131285e-06, "loss": 3.6026, "step": 2627 }, { "epoch": 0.8957055214723927, "grad_norm": 17.35702790109682, "learning_rate": 8.848290224929163e-06, "loss": 2.8426, "step": 2628 }, { "epoch": 0.8960463531015678, "grad_norm": 31.52000216206936, "learning_rate": 8.84702381337567e-06, "loss": 3.3156, "step": 2629 }, { "epoch": 0.896387184730743, "grad_norm": 14.874042831185387, "learning_rate": 8.845756796670017e-06, "loss": 3.6809, "step": 2630 }, { "epoch": 0.8967280163599182, "grad_norm": 15.418573738933045, "learning_rate": 8.84448917501151e-06, "loss": 3.1221, "step": 2631 }, { "epoch": 0.8970688479890934, "grad_norm": 18.949302839500017, "learning_rate": 8.843220948599553e-06, "loss": 3.3833, "step": 2632 }, { "epoch": 0.8974096796182686, "grad_norm": 16.447862832899936, "learning_rate": 8.841952117633642e-06, "loss": 3.2716, "step": 2633 }, { "epoch": 0.8977505112474438, "grad_norm": 22.129351682372214, "learning_rate": 8.840682682313368e-06, "loss": 3.3612, "step": 2634 }, { "epoch": 0.898091342876619, "grad_norm": 21.19170689422571, "learning_rate": 8.839412642838418e-06, "loss": 3.6422, "step": 2635 }, { "epoch": 0.8984321745057942, "grad_norm": 17.49137166910942, "learning_rate": 8.838141999408577e-06, "loss": 3.6466, "step": 2636 }, { "epoch": 0.8987730061349694, "grad_norm": 20.8124007650098, "learning_rate": 8.83687075222372e-06, "loss": 2.9688, "step": 2637 }, { "epoch": 0.8991138377641446, "grad_norm": 26.594509608147657, "learning_rate": 8.835598901483822e-06, "loss": 4.4425, "step": 2638 }, { "epoch": 0.8994546693933198, "grad_norm": 17.26414529764415, "learning_rate": 8.834326447388946e-06, "loss": 3.2178, "step": 2639 }, { "epoch": 0.8997955010224948, "grad_norm": 17.110122102804514, "learning_rate": 8.833053390139257e-06, "loss": 3.6444, "step": 2640 }, { "epoch": 0.90013633265167, "grad_norm": 16.658017762330513, "learning_rate": 8.831779729935011e-06, "loss": 3.2089, "step": 2641 }, { "epoch": 0.9004771642808452, "grad_norm": 17.608557432701666, "learning_rate": 8.830505466976562e-06, "loss": 3.4995, "step": 2642 }, { "epoch": 0.9008179959100204, "grad_norm": 13.384039141678082, "learning_rate": 8.82923060146435e-06, "loss": 2.4669, "step": 2643 }, { "epoch": 0.9011588275391956, "grad_norm": 26.086203816286904, "learning_rate": 8.827955133598925e-06, "loss": 3.2553, "step": 2644 }, { "epoch": 0.9014996591683708, "grad_norm": 13.971221993817517, "learning_rate": 8.826679063580918e-06, "loss": 3.2402, "step": 2645 }, { "epoch": 0.901840490797546, "grad_norm": 16.752079543640747, "learning_rate": 8.825402391611062e-06, "loss": 3.0951, "step": 2646 }, { "epoch": 0.9021813224267212, "grad_norm": 17.22384166909549, "learning_rate": 8.824125117890182e-06, "loss": 3.0001, "step": 2647 }, { "epoch": 0.9025221540558964, "grad_norm": 17.316426255556912, "learning_rate": 8.822847242619198e-06, "loss": 3.2489, "step": 2648 }, { "epoch": 0.9028629856850716, "grad_norm": 18.68017830199737, "learning_rate": 8.821568765999124e-06, "loss": 2.8603, "step": 2649 }, { "epoch": 0.9032038173142468, "grad_norm": 20.210483931266573, "learning_rate": 8.820289688231069e-06, "loss": 3.6506, "step": 2650 }, { "epoch": 0.9035446489434219, "grad_norm": 21.567645590345684, "learning_rate": 8.81901000951624e-06, "loss": 3.3283, "step": 2651 }, { "epoch": 0.9038854805725971, "grad_norm": 16.419713208746742, "learning_rate": 8.817729730055935e-06, "loss": 3.2494, "step": 2652 }, { "epoch": 0.9042263122017723, "grad_norm": 14.687542786488185, "learning_rate": 8.816448850051544e-06, "loss": 3.5863, "step": 2653 }, { "epoch": 0.9045671438309475, "grad_norm": 14.057386467627571, "learning_rate": 8.815167369704557e-06, "loss": 2.8908, "step": 2654 }, { "epoch": 0.9049079754601227, "grad_norm": 14.726088569920323, "learning_rate": 8.813885289216555e-06, "loss": 2.9723, "step": 2655 }, { "epoch": 0.9052488070892979, "grad_norm": 14.117743389014294, "learning_rate": 8.812602608789216e-06, "loss": 2.9577, "step": 2656 }, { "epoch": 0.9055896387184731, "grad_norm": 17.904001631391075, "learning_rate": 8.811319328624308e-06, "loss": 3.6025, "step": 2657 }, { "epoch": 0.9059304703476483, "grad_norm": 15.645893785376682, "learning_rate": 8.810035448923699e-06, "loss": 3.475, "step": 2658 }, { "epoch": 0.9062713019768235, "grad_norm": 15.99775212059134, "learning_rate": 8.808750969889346e-06, "loss": 3.5906, "step": 2659 }, { "epoch": 0.9066121336059987, "grad_norm": 11.943592456920813, "learning_rate": 8.807465891723306e-06, "loss": 2.7103, "step": 2660 }, { "epoch": 0.9069529652351738, "grad_norm": 13.171650670181881, "learning_rate": 8.806180214627723e-06, "loss": 2.9712, "step": 2661 }, { "epoch": 0.907293796864349, "grad_norm": 14.68175101835619, "learning_rate": 8.804893938804839e-06, "loss": 2.9511, "step": 2662 }, { "epoch": 0.9076346284935242, "grad_norm": 20.49888645783011, "learning_rate": 8.803607064456994e-06, "loss": 3.5643, "step": 2663 }, { "epoch": 0.9079754601226994, "grad_norm": 13.692582967666993, "learning_rate": 8.802319591786617e-06, "loss": 2.4134, "step": 2664 }, { "epoch": 0.9083162917518746, "grad_norm": 15.720765656577715, "learning_rate": 8.801031520996231e-06, "loss": 3.4724, "step": 2665 }, { "epoch": 0.9086571233810498, "grad_norm": 22.311878984212996, "learning_rate": 8.799742852288456e-06, "loss": 3.5045, "step": 2666 }, { "epoch": 0.908997955010225, "grad_norm": 20.103471092021575, "learning_rate": 8.798453585866003e-06, "loss": 2.5639, "step": 2667 }, { "epoch": 0.9093387866394002, "grad_norm": 18.274332855727753, "learning_rate": 8.797163721931681e-06, "loss": 3.5239, "step": 2668 }, { "epoch": 0.9096796182685754, "grad_norm": 18.27311623503724, "learning_rate": 8.79587326068839e-06, "loss": 3.7754, "step": 2669 }, { "epoch": 0.9100204498977505, "grad_norm": 32.16892554943389, "learning_rate": 8.794582202339127e-06, "loss": 3.3482, "step": 2670 }, { "epoch": 0.9103612815269257, "grad_norm": 18.19777500467198, "learning_rate": 8.793290547086975e-06, "loss": 3.3267, "step": 2671 }, { "epoch": 0.9107021131561008, "grad_norm": 12.965224396513731, "learning_rate": 8.791998295135121e-06, "loss": 3.0989, "step": 2672 }, { "epoch": 0.911042944785276, "grad_norm": 16.202709702746365, "learning_rate": 8.79070544668684e-06, "loss": 3.5136, "step": 2673 }, { "epoch": 0.9113837764144512, "grad_norm": 14.108593067115311, "learning_rate": 8.789412001945501e-06, "loss": 3.0737, "step": 2674 }, { "epoch": 0.9117246080436264, "grad_norm": 18.884089862378104, "learning_rate": 8.788117961114571e-06, "loss": 3.5057, "step": 2675 }, { "epoch": 0.9120654396728016, "grad_norm": 13.962178225779653, "learning_rate": 8.786823324397606e-06, "loss": 3.0433, "step": 2676 }, { "epoch": 0.9124062713019768, "grad_norm": 15.059101769256207, "learning_rate": 8.785528091998258e-06, "loss": 3.2006, "step": 2677 }, { "epoch": 0.912747102931152, "grad_norm": 17.43805567503729, "learning_rate": 8.784232264120271e-06, "loss": 2.9184, "step": 2678 }, { "epoch": 0.9130879345603272, "grad_norm": 15.466016547319532, "learning_rate": 8.782935840967484e-06, "loss": 3.2058, "step": 2679 }, { "epoch": 0.9134287661895024, "grad_norm": 16.73806302869103, "learning_rate": 8.781638822743832e-06, "loss": 3.6101, "step": 2680 }, { "epoch": 0.9137695978186776, "grad_norm": 28.569871862204458, "learning_rate": 8.780341209653338e-06, "loss": 4.2538, "step": 2681 }, { "epoch": 0.9141104294478528, "grad_norm": 23.354157147359675, "learning_rate": 8.779043001900123e-06, "loss": 3.6315, "step": 2682 }, { "epoch": 0.9144512610770279, "grad_norm": 16.928938502856933, "learning_rate": 8.777744199688401e-06, "loss": 3.3808, "step": 2683 }, { "epoch": 0.9147920927062031, "grad_norm": 21.212963380276406, "learning_rate": 8.776444803222479e-06, "loss": 2.7492, "step": 2684 }, { "epoch": 0.9151329243353783, "grad_norm": 14.931545355197645, "learning_rate": 8.775144812706755e-06, "loss": 3.3506, "step": 2685 }, { "epoch": 0.9154737559645535, "grad_norm": 17.45626273219767, "learning_rate": 8.773844228345723e-06, "loss": 3.2479, "step": 2686 }, { "epoch": 0.9158145875937287, "grad_norm": 16.604700182268154, "learning_rate": 8.772543050343973e-06, "loss": 3.0862, "step": 2687 }, { "epoch": 0.9161554192229039, "grad_norm": 19.794854445518318, "learning_rate": 8.771241278906181e-06, "loss": 2.976, "step": 2688 }, { "epoch": 0.9164962508520791, "grad_norm": 11.841613925882248, "learning_rate": 8.769938914237127e-06, "loss": 2.255, "step": 2689 }, { "epoch": 0.9168370824812543, "grad_norm": 15.127237969462035, "learning_rate": 8.768635956541671e-06, "loss": 3.2708, "step": 2690 }, { "epoch": 0.9171779141104295, "grad_norm": 12.39247711522195, "learning_rate": 8.767332406024778e-06, "loss": 3.3598, "step": 2691 }, { "epoch": 0.9175187457396047, "grad_norm": 11.438859153788387, "learning_rate": 8.766028262891502e-06, "loss": 2.9445, "step": 2692 }, { "epoch": 0.9178595773687799, "grad_norm": 13.704188728985493, "learning_rate": 8.764723527346987e-06, "loss": 3.0133, "step": 2693 }, { "epoch": 0.918200408997955, "grad_norm": 27.923795480439864, "learning_rate": 8.763418199596476e-06, "loss": 3.0981, "step": 2694 }, { "epoch": 0.9185412406271302, "grad_norm": 27.89222571287306, "learning_rate": 8.762112279845301e-06, "loss": 3.0411, "step": 2695 }, { "epoch": 0.9188820722563054, "grad_norm": 17.081381969570064, "learning_rate": 8.760805768298888e-06, "loss": 3.0645, "step": 2696 }, { "epoch": 0.9192229038854806, "grad_norm": 13.462935900755797, "learning_rate": 8.759498665162755e-06, "loss": 3.0763, "step": 2697 }, { "epoch": 0.9195637355146558, "grad_norm": 15.352862861408548, "learning_rate": 8.758190970642518e-06, "loss": 3.1632, "step": 2698 }, { "epoch": 0.919904567143831, "grad_norm": 18.889489638635084, "learning_rate": 8.756882684943882e-06, "loss": 3.6544, "step": 2699 }, { "epoch": 0.9202453987730062, "grad_norm": 11.184898164431631, "learning_rate": 8.75557380827264e-06, "loss": 2.6107, "step": 2700 }, { "epoch": 0.9205862304021813, "grad_norm": 16.434723668721904, "learning_rate": 8.754264340834693e-06, "loss": 3.3371, "step": 2701 }, { "epoch": 0.9209270620313565, "grad_norm": 20.540947522442178, "learning_rate": 8.752954282836017e-06, "loss": 3.5615, "step": 2702 }, { "epoch": 0.9212678936605317, "grad_norm": 15.918671334533725, "learning_rate": 8.751643634482694e-06, "loss": 3.622, "step": 2703 }, { "epoch": 0.9216087252897068, "grad_norm": 18.41045783318474, "learning_rate": 8.750332395980894e-06, "loss": 3.6994, "step": 2704 }, { "epoch": 0.921949556918882, "grad_norm": 14.193659772694692, "learning_rate": 8.749020567536876e-06, "loss": 3.0117, "step": 2705 }, { "epoch": 0.9222903885480572, "grad_norm": 19.202775645818324, "learning_rate": 8.747708149357e-06, "loss": 3.6405, "step": 2706 }, { "epoch": 0.9226312201772324, "grad_norm": 19.771601071281292, "learning_rate": 8.746395141647715e-06, "loss": 3.7243, "step": 2707 }, { "epoch": 0.9229720518064076, "grad_norm": 18.844076701969367, "learning_rate": 8.745081544615558e-06, "loss": 3.4551, "step": 2708 }, { "epoch": 0.9233128834355828, "grad_norm": 15.583727724866202, "learning_rate": 8.743767358467166e-06, "loss": 3.5193, "step": 2709 }, { "epoch": 0.923653715064758, "grad_norm": 18.596674852453365, "learning_rate": 8.742452583409267e-06, "loss": 3.5285, "step": 2710 }, { "epoch": 0.9239945466939332, "grad_norm": 15.622796479760206, "learning_rate": 8.741137219648679e-06, "loss": 3.2276, "step": 2711 }, { "epoch": 0.9243353783231084, "grad_norm": 13.533598236824494, "learning_rate": 8.739821267392313e-06, "loss": 2.9683, "step": 2712 }, { "epoch": 0.9246762099522836, "grad_norm": 16.853232302027703, "learning_rate": 8.738504726847173e-06, "loss": 3.6098, "step": 2713 }, { "epoch": 0.9250170415814588, "grad_norm": 18.19295608536347, "learning_rate": 8.73718759822036e-06, "loss": 3.7979, "step": 2714 }, { "epoch": 0.9253578732106339, "grad_norm": 12.266374440986263, "learning_rate": 8.735869881719059e-06, "loss": 2.8422, "step": 2715 }, { "epoch": 0.9256987048398091, "grad_norm": 22.066630149072523, "learning_rate": 8.734551577550554e-06, "loss": 3.406, "step": 2716 }, { "epoch": 0.9260395364689843, "grad_norm": 19.703686450459372, "learning_rate": 8.73323268592222e-06, "loss": 3.3974, "step": 2717 }, { "epoch": 0.9263803680981595, "grad_norm": 13.97626936702887, "learning_rate": 8.731913207041523e-06, "loss": 3.2129, "step": 2718 }, { "epoch": 0.9267211997273347, "grad_norm": 14.229750218347712, "learning_rate": 8.730593141116023e-06, "loss": 3.1026, "step": 2719 }, { "epoch": 0.9270620313565099, "grad_norm": 13.293561825818738, "learning_rate": 8.729272488353371e-06, "loss": 3.1943, "step": 2720 }, { "epoch": 0.9274028629856851, "grad_norm": 13.035256160926515, "learning_rate": 8.727951248961312e-06, "loss": 2.7408, "step": 2721 }, { "epoch": 0.9277436946148603, "grad_norm": 14.915716848096986, "learning_rate": 8.72662942314768e-06, "loss": 3.275, "step": 2722 }, { "epoch": 0.9280845262440355, "grad_norm": 19.185895609534086, "learning_rate": 8.725307011120406e-06, "loss": 3.3366, "step": 2723 }, { "epoch": 0.9284253578732107, "grad_norm": 15.085483594969737, "learning_rate": 8.72398401308751e-06, "loss": 3.0332, "step": 2724 }, { "epoch": 0.9287661895023859, "grad_norm": 21.545592371847707, "learning_rate": 8.722660429257103e-06, "loss": 3.276, "step": 2725 }, { "epoch": 0.929107021131561, "grad_norm": 17.68774475685339, "learning_rate": 8.721336259837391e-06, "loss": 3.3784, "step": 2726 }, { "epoch": 0.9294478527607362, "grad_norm": 12.511006972962107, "learning_rate": 8.720011505036674e-06, "loss": 3.2995, "step": 2727 }, { "epoch": 0.9297886843899114, "grad_norm": 14.181119863045677, "learning_rate": 8.718686165063337e-06, "loss": 3.3101, "step": 2728 }, { "epoch": 0.9301295160190866, "grad_norm": 24.777411971917303, "learning_rate": 8.717360240125863e-06, "loss": 3.3187, "step": 2729 }, { "epoch": 0.9304703476482618, "grad_norm": 22.809261269472046, "learning_rate": 8.716033730432826e-06, "loss": 3.5693, "step": 2730 }, { "epoch": 0.930811179277437, "grad_norm": 36.59921946401887, "learning_rate": 8.714706636192892e-06, "loss": 3.401, "step": 2731 }, { "epoch": 0.9311520109066121, "grad_norm": 14.207790415825853, "learning_rate": 8.713378957614813e-06, "loss": 2.3107, "step": 2732 }, { "epoch": 0.9314928425357873, "grad_norm": 12.444214000057753, "learning_rate": 8.712050694907443e-06, "loss": 2.7456, "step": 2733 }, { "epoch": 0.9318336741649625, "grad_norm": 18.93552776140509, "learning_rate": 8.710721848279724e-06, "loss": 3.3428, "step": 2734 }, { "epoch": 0.9321745057941377, "grad_norm": 16.145907929162163, "learning_rate": 8.709392417940686e-06, "loss": 3.6313, "step": 2735 }, { "epoch": 0.9325153374233128, "grad_norm": 14.203083083293196, "learning_rate": 8.708062404099453e-06, "loss": 2.8628, "step": 2736 }, { "epoch": 0.932856169052488, "grad_norm": 12.18440033169147, "learning_rate": 8.706731806965244e-06, "loss": 2.965, "step": 2737 }, { "epoch": 0.9331970006816632, "grad_norm": 13.658952819883211, "learning_rate": 8.705400626747365e-06, "loss": 3.1434, "step": 2738 }, { "epoch": 0.9335378323108384, "grad_norm": 21.84118212201773, "learning_rate": 8.704068863655218e-06, "loss": 3.1146, "step": 2739 }, { "epoch": 0.9338786639400136, "grad_norm": 23.554320503840835, "learning_rate": 8.702736517898294e-06, "loss": 3.2486, "step": 2740 }, { "epoch": 0.9342194955691888, "grad_norm": 16.606611755516095, "learning_rate": 8.701403589686175e-06, "loss": 3.0872, "step": 2741 }, { "epoch": 0.934560327198364, "grad_norm": 18.685607040478704, "learning_rate": 8.700070079228537e-06, "loss": 3.4153, "step": 2742 }, { "epoch": 0.9349011588275392, "grad_norm": 35.15931435692519, "learning_rate": 8.698735986735148e-06, "loss": 4.0946, "step": 2743 }, { "epoch": 0.9352419904567144, "grad_norm": 14.370839468399605, "learning_rate": 8.697401312415863e-06, "loss": 3.1713, "step": 2744 }, { "epoch": 0.9355828220858896, "grad_norm": 17.09463103909829, "learning_rate": 8.696066056480633e-06, "loss": 3.6148, "step": 2745 }, { "epoch": 0.9359236537150648, "grad_norm": 22.230872435832456, "learning_rate": 8.6947302191395e-06, "loss": 3.6029, "step": 2746 }, { "epoch": 0.9362644853442399, "grad_norm": 16.04417335363527, "learning_rate": 8.693393800602594e-06, "loss": 3.4435, "step": 2747 }, { "epoch": 0.9366053169734151, "grad_norm": 16.284304007893816, "learning_rate": 8.692056801080143e-06, "loss": 2.6488, "step": 2748 }, { "epoch": 0.9369461486025903, "grad_norm": 11.445700258765418, "learning_rate": 8.690719220782458e-06, "loss": 2.3689, "step": 2749 }, { "epoch": 0.9372869802317655, "grad_norm": 17.7027424628809, "learning_rate": 8.689381059919949e-06, "loss": 3.3164, "step": 2750 }, { "epoch": 0.9376278118609407, "grad_norm": 15.125364443592483, "learning_rate": 8.688042318703111e-06, "loss": 3.4067, "step": 2751 }, { "epoch": 0.9379686434901159, "grad_norm": 15.873644132733526, "learning_rate": 8.686702997342538e-06, "loss": 3.415, "step": 2752 }, { "epoch": 0.9383094751192911, "grad_norm": 28.75980769495674, "learning_rate": 8.685363096048905e-06, "loss": 3.8048, "step": 2753 }, { "epoch": 0.9386503067484663, "grad_norm": 11.800571858379246, "learning_rate": 8.684022615032988e-06, "loss": 3.2646, "step": 2754 }, { "epoch": 0.9389911383776415, "grad_norm": 15.592548451780727, "learning_rate": 8.682681554505647e-06, "loss": 2.8499, "step": 2755 }, { "epoch": 0.9393319700068167, "grad_norm": 18.752358690905794, "learning_rate": 8.681339914677838e-06, "loss": 3.671, "step": 2756 }, { "epoch": 0.9396728016359919, "grad_norm": 18.08280658542198, "learning_rate": 8.679997695760606e-06, "loss": 3.1847, "step": 2757 }, { "epoch": 0.940013633265167, "grad_norm": 23.424175429876556, "learning_rate": 8.678654897965087e-06, "loss": 3.3421, "step": 2758 }, { "epoch": 0.9403544648943422, "grad_norm": 22.51120242730242, "learning_rate": 8.67731152150251e-06, "loss": 3.3113, "step": 2759 }, { "epoch": 0.9406952965235174, "grad_norm": 16.199334017975403, "learning_rate": 8.67596756658419e-06, "loss": 2.9616, "step": 2760 }, { "epoch": 0.9410361281526926, "grad_norm": 15.933501695689928, "learning_rate": 8.67462303342154e-06, "loss": 3.1875, "step": 2761 }, { "epoch": 0.9413769597818678, "grad_norm": 19.417927762783215, "learning_rate": 8.673277922226058e-06, "loss": 3.5381, "step": 2762 }, { "epoch": 0.941717791411043, "grad_norm": 18.158430632870704, "learning_rate": 8.671932233209338e-06, "loss": 3.1784, "step": 2763 }, { "epoch": 0.9420586230402181, "grad_norm": 18.198945425879312, "learning_rate": 8.670585966583059e-06, "loss": 3.4108, "step": 2764 }, { "epoch": 0.9423994546693933, "grad_norm": 19.265427772739574, "learning_rate": 8.669239122558996e-06, "loss": 3.1273, "step": 2765 }, { "epoch": 0.9427402862985685, "grad_norm": 16.406940735270705, "learning_rate": 8.667891701349014e-06, "loss": 3.1972, "step": 2766 }, { "epoch": 0.9430811179277437, "grad_norm": 16.183927657605565, "learning_rate": 8.666543703165065e-06, "loss": 3.0891, "step": 2767 }, { "epoch": 0.9434219495569189, "grad_norm": 14.948981902038565, "learning_rate": 8.665195128219197e-06, "loss": 3.4925, "step": 2768 }, { "epoch": 0.943762781186094, "grad_norm": 21.94712150785344, "learning_rate": 8.663845976723544e-06, "loss": 3.449, "step": 2769 }, { "epoch": 0.9441036128152692, "grad_norm": 16.179526533716203, "learning_rate": 8.662496248890333e-06, "loss": 3.3989, "step": 2770 }, { "epoch": 0.9444444444444444, "grad_norm": 14.886384930371841, "learning_rate": 8.661145944931884e-06, "loss": 3.4246, "step": 2771 }, { "epoch": 0.9447852760736196, "grad_norm": 13.871801816015386, "learning_rate": 8.659795065060603e-06, "loss": 3.016, "step": 2772 }, { "epoch": 0.9451261077027948, "grad_norm": 14.398798020441092, "learning_rate": 8.658443609488989e-06, "loss": 3.5073, "step": 2773 }, { "epoch": 0.94546693933197, "grad_norm": 12.433862849611405, "learning_rate": 8.657091578429632e-06, "loss": 3.0096, "step": 2774 }, { "epoch": 0.9458077709611452, "grad_norm": 16.096485777415598, "learning_rate": 8.655738972095214e-06, "loss": 3.4204, "step": 2775 }, { "epoch": 0.9461486025903204, "grad_norm": 15.210361184852108, "learning_rate": 8.6543857906985e-06, "loss": 3.0871, "step": 2776 }, { "epoch": 0.9464894342194956, "grad_norm": 17.54791898069647, "learning_rate": 8.653032034452356e-06, "loss": 3.8313, "step": 2777 }, { "epoch": 0.9468302658486708, "grad_norm": 16.327277131264548, "learning_rate": 8.65167770356973e-06, "loss": 3.4626, "step": 2778 }, { "epoch": 0.9471710974778459, "grad_norm": 16.533522416838554, "learning_rate": 8.650322798263664e-06, "loss": 3.166, "step": 2779 }, { "epoch": 0.9475119291070211, "grad_norm": 9.905318344692155, "learning_rate": 8.648967318747293e-06, "loss": 2.904, "step": 2780 }, { "epoch": 0.9478527607361963, "grad_norm": 16.51945061178983, "learning_rate": 8.647611265233836e-06, "loss": 3.5982, "step": 2781 }, { "epoch": 0.9481935923653715, "grad_norm": 18.722744250204418, "learning_rate": 8.646254637936606e-06, "loss": 3.6472, "step": 2782 }, { "epoch": 0.9485344239945467, "grad_norm": 17.55355286550268, "learning_rate": 8.644897437069007e-06, "loss": 3.5161, "step": 2783 }, { "epoch": 0.9488752556237219, "grad_norm": 16.187964154447286, "learning_rate": 8.643539662844531e-06, "loss": 3.1311, "step": 2784 }, { "epoch": 0.9492160872528971, "grad_norm": 16.21546403529476, "learning_rate": 8.642181315476764e-06, "loss": 3.0491, "step": 2785 }, { "epoch": 0.9495569188820723, "grad_norm": 17.23886591257753, "learning_rate": 8.640822395179377e-06, "loss": 3.5102, "step": 2786 }, { "epoch": 0.9498977505112475, "grad_norm": 18.12918007257363, "learning_rate": 8.63946290216613e-06, "loss": 3.6069, "step": 2787 }, { "epoch": 0.9502385821404227, "grad_norm": 20.17444667108366, "learning_rate": 8.638102836650885e-06, "loss": 3.6134, "step": 2788 }, { "epoch": 0.9505794137695979, "grad_norm": 31.810541330336193, "learning_rate": 8.636742198847579e-06, "loss": 3.2792, "step": 2789 }, { "epoch": 0.950920245398773, "grad_norm": 21.62079264536407, "learning_rate": 8.635380988970249e-06, "loss": 3.3417, "step": 2790 }, { "epoch": 0.9512610770279482, "grad_norm": 14.991020595826525, "learning_rate": 8.634019207233017e-06, "loss": 3.3229, "step": 2791 }, { "epoch": 0.9516019086571234, "grad_norm": 24.13921038669793, "learning_rate": 8.632656853850098e-06, "loss": 3.6314, "step": 2792 }, { "epoch": 0.9519427402862985, "grad_norm": 13.864205026343505, "learning_rate": 8.631293929035797e-06, "loss": 3.2958, "step": 2793 }, { "epoch": 0.9522835719154737, "grad_norm": 16.135151450139325, "learning_rate": 8.629930433004504e-06, "loss": 3.3525, "step": 2794 }, { "epoch": 0.952624403544649, "grad_norm": 13.530345918015703, "learning_rate": 8.628566365970703e-06, "loss": 3.1166, "step": 2795 }, { "epoch": 0.9529652351738241, "grad_norm": 13.395524803528065, "learning_rate": 8.62720172814897e-06, "loss": 3.031, "step": 2796 }, { "epoch": 0.9533060668029993, "grad_norm": 20.088889289219562, "learning_rate": 8.625836519753965e-06, "loss": 3.437, "step": 2797 }, { "epoch": 0.9536468984321745, "grad_norm": 16.8772019269885, "learning_rate": 8.624470741000443e-06, "loss": 3.3881, "step": 2798 }, { "epoch": 0.9539877300613497, "grad_norm": 12.476572434961101, "learning_rate": 8.623104392103243e-06, "loss": 2.7556, "step": 2799 }, { "epoch": 0.9543285616905249, "grad_norm": 28.22830494037833, "learning_rate": 8.621737473277301e-06, "loss": 3.1807, "step": 2800 }, { "epoch": 0.9546693933197, "grad_norm": 11.542727984623077, "learning_rate": 8.620369984737638e-06, "loss": 3.3033, "step": 2801 }, { "epoch": 0.9550102249488752, "grad_norm": 17.371922159090055, "learning_rate": 8.619001926699363e-06, "loss": 3.3585, "step": 2802 }, { "epoch": 0.9553510565780504, "grad_norm": 17.33519073412746, "learning_rate": 8.617633299377679e-06, "loss": 3.4994, "step": 2803 }, { "epoch": 0.9556918882072256, "grad_norm": 14.920739044936182, "learning_rate": 8.616264102987875e-06, "loss": 3.1437, "step": 2804 }, { "epoch": 0.9560327198364008, "grad_norm": 18.10337417746916, "learning_rate": 8.614894337745334e-06, "loss": 3.2551, "step": 2805 }, { "epoch": 0.956373551465576, "grad_norm": 18.483606071374336, "learning_rate": 8.613524003865523e-06, "loss": 3.4272, "step": 2806 }, { "epoch": 0.9567143830947512, "grad_norm": 19.349040179563065, "learning_rate": 8.612153101564e-06, "loss": 3.1107, "step": 2807 }, { "epoch": 0.9570552147239264, "grad_norm": 24.70065785818656, "learning_rate": 8.610781631056417e-06, "loss": 3.7859, "step": 2808 }, { "epoch": 0.9573960463531016, "grad_norm": 15.628566646374786, "learning_rate": 8.609409592558509e-06, "loss": 3.7556, "step": 2809 }, { "epoch": 0.9577368779822768, "grad_norm": 32.49965967039522, "learning_rate": 8.608036986286104e-06, "loss": 4.3735, "step": 2810 }, { "epoch": 0.9580777096114519, "grad_norm": 17.341015277489873, "learning_rate": 8.606663812455118e-06, "loss": 3.0412, "step": 2811 }, { "epoch": 0.9584185412406271, "grad_norm": 19.28534746699952, "learning_rate": 8.605290071281556e-06, "loss": 3.2379, "step": 2812 }, { "epoch": 0.9587593728698023, "grad_norm": 20.42147143675425, "learning_rate": 8.603915762981516e-06, "loss": 3.1202, "step": 2813 }, { "epoch": 0.9591002044989775, "grad_norm": 22.623211392126045, "learning_rate": 8.602540887771179e-06, "loss": 3.2372, "step": 2814 }, { "epoch": 0.9594410361281527, "grad_norm": 21.14254697777601, "learning_rate": 8.60116544586682e-06, "loss": 3.6696, "step": 2815 }, { "epoch": 0.9597818677573279, "grad_norm": 14.773857188665795, "learning_rate": 8.599789437484801e-06, "loss": 3.0701, "step": 2816 }, { "epoch": 0.9601226993865031, "grad_norm": 14.412174191576682, "learning_rate": 8.598412862841572e-06, "loss": 2.5487, "step": 2817 }, { "epoch": 0.9604635310156783, "grad_norm": 13.288714071078264, "learning_rate": 8.597035722153677e-06, "loss": 3.2021, "step": 2818 }, { "epoch": 0.9608043626448535, "grad_norm": 14.045592999443734, "learning_rate": 8.595658015637744e-06, "loss": 3.1294, "step": 2819 }, { "epoch": 0.9611451942740287, "grad_norm": 15.447504999895811, "learning_rate": 8.59427974351049e-06, "loss": 3.6895, "step": 2820 }, { "epoch": 0.9614860259032039, "grad_norm": 12.542480614008712, "learning_rate": 8.592900905988726e-06, "loss": 2.817, "step": 2821 }, { "epoch": 0.961826857532379, "grad_norm": 17.228522426485057, "learning_rate": 8.591521503289347e-06, "loss": 3.113, "step": 2822 }, { "epoch": 0.9621676891615542, "grad_norm": 25.144880355395923, "learning_rate": 8.590141535629338e-06, "loss": 3.6082, "step": 2823 }, { "epoch": 0.9625085207907293, "grad_norm": 17.27745823503467, "learning_rate": 8.588761003225772e-06, "loss": 3.5341, "step": 2824 }, { "epoch": 0.9628493524199045, "grad_norm": 18.39118197066289, "learning_rate": 8.587379906295817e-06, "loss": 3.5686, "step": 2825 }, { "epoch": 0.9631901840490797, "grad_norm": 13.146205143065188, "learning_rate": 8.585998245056721e-06, "loss": 3.2085, "step": 2826 }, { "epoch": 0.9635310156782549, "grad_norm": 25.776063887366817, "learning_rate": 8.584616019725826e-06, "loss": 3.1908, "step": 2827 }, { "epoch": 0.9638718473074301, "grad_norm": 23.965906389494332, "learning_rate": 8.583233230520562e-06, "loss": 3.1167, "step": 2828 }, { "epoch": 0.9642126789366053, "grad_norm": 16.25548217711139, "learning_rate": 8.581849877658445e-06, "loss": 3.2865, "step": 2829 }, { "epoch": 0.9645535105657805, "grad_norm": 14.881080416380206, "learning_rate": 8.580465961357087e-06, "loss": 3.2609, "step": 2830 }, { "epoch": 0.9648943421949557, "grad_norm": 43.1211620956575, "learning_rate": 8.579081481834177e-06, "loss": 3.4217, "step": 2831 }, { "epoch": 0.9652351738241309, "grad_norm": 16.068810100939025, "learning_rate": 8.577696439307506e-06, "loss": 3.2813, "step": 2832 }, { "epoch": 0.965576005453306, "grad_norm": 14.097840241761935, "learning_rate": 8.57631083399494e-06, "loss": 3.2195, "step": 2833 }, { "epoch": 0.9659168370824812, "grad_norm": 19.43650617921922, "learning_rate": 8.574924666114445e-06, "loss": 3.5986, "step": 2834 }, { "epoch": 0.9662576687116564, "grad_norm": 25.312086093731, "learning_rate": 8.573537935884067e-06, "loss": 3.8416, "step": 2835 }, { "epoch": 0.9665985003408316, "grad_norm": 24.261166727951263, "learning_rate": 8.572150643521949e-06, "loss": 3.6539, "step": 2836 }, { "epoch": 0.9669393319700068, "grad_norm": 13.29018363665877, "learning_rate": 8.570762789246314e-06, "loss": 3.0425, "step": 2837 }, { "epoch": 0.967280163599182, "grad_norm": 12.720455962080838, "learning_rate": 8.569374373275477e-06, "loss": 2.9033, "step": 2838 }, { "epoch": 0.9676209952283572, "grad_norm": 21.084955632282604, "learning_rate": 8.567985395827843e-06, "loss": 3.848, "step": 2839 }, { "epoch": 0.9679618268575324, "grad_norm": 19.519430392014677, "learning_rate": 8.566595857121902e-06, "loss": 2.5476, "step": 2840 }, { "epoch": 0.9683026584867076, "grad_norm": 16.45533444935172, "learning_rate": 8.565205757376235e-06, "loss": 3.6921, "step": 2841 }, { "epoch": 0.9686434901158828, "grad_norm": 17.016346178739973, "learning_rate": 8.563815096809512e-06, "loss": 3.1871, "step": 2842 }, { "epoch": 0.9689843217450579, "grad_norm": 12.110584699619704, "learning_rate": 8.562423875640485e-06, "loss": 3.3109, "step": 2843 }, { "epoch": 0.9693251533742331, "grad_norm": 19.45734810317632, "learning_rate": 8.561032094088002e-06, "loss": 3.1773, "step": 2844 }, { "epoch": 0.9696659850034083, "grad_norm": 17.766459954813246, "learning_rate": 8.559639752370997e-06, "loss": 3.6258, "step": 2845 }, { "epoch": 0.9700068166325835, "grad_norm": 16.937165269537452, "learning_rate": 8.558246850708485e-06, "loss": 3.152, "step": 2846 }, { "epoch": 0.9703476482617587, "grad_norm": 20.992291599324776, "learning_rate": 8.556853389319581e-06, "loss": 2.7195, "step": 2847 }, { "epoch": 0.9706884798909339, "grad_norm": 13.08818643863256, "learning_rate": 8.55545936842348e-06, "loss": 2.9693, "step": 2848 }, { "epoch": 0.9710293115201091, "grad_norm": 18.672059535501482, "learning_rate": 8.554064788239467e-06, "loss": 2.6755, "step": 2849 }, { "epoch": 0.9713701431492843, "grad_norm": 29.020310783601275, "learning_rate": 8.552669648986914e-06, "loss": 3.7968, "step": 2850 }, { "epoch": 0.9717109747784595, "grad_norm": 18.18303304751928, "learning_rate": 8.551273950885284e-06, "loss": 3.5419, "step": 2851 }, { "epoch": 0.9720518064076347, "grad_norm": 12.975828210593502, "learning_rate": 8.549877694154123e-06, "loss": 4.333, "step": 2852 }, { "epoch": 0.9723926380368099, "grad_norm": 29.32352474008414, "learning_rate": 8.54848087901307e-06, "loss": 3.1318, "step": 2853 }, { "epoch": 0.972733469665985, "grad_norm": 16.581712830546596, "learning_rate": 8.547083505681851e-06, "loss": 3.2858, "step": 2854 }, { "epoch": 0.9730743012951601, "grad_norm": 12.72710372072233, "learning_rate": 8.545685574380274e-06, "loss": 2.8104, "step": 2855 }, { "epoch": 0.9734151329243353, "grad_norm": 16.169013520474138, "learning_rate": 8.544287085328244e-06, "loss": 3.514, "step": 2856 }, { "epoch": 0.9737559645535105, "grad_norm": 17.275093009137397, "learning_rate": 8.542888038745745e-06, "loss": 3.4519, "step": 2857 }, { "epoch": 0.9740967961826857, "grad_norm": 14.065451497692326, "learning_rate": 8.541488434852854e-06, "loss": 2.6869, "step": 2858 }, { "epoch": 0.9744376278118609, "grad_norm": 23.742721073734234, "learning_rate": 8.540088273869735e-06, "loss": 3.7695, "step": 2859 }, { "epoch": 0.9747784594410361, "grad_norm": 13.588248602132353, "learning_rate": 8.538687556016638e-06, "loss": 2.9224, "step": 2860 }, { "epoch": 0.9751192910702113, "grad_norm": 14.900002884669592, "learning_rate": 8.537286281513901e-06, "loss": 3.5515, "step": 2861 }, { "epoch": 0.9754601226993865, "grad_norm": 16.134431053705402, "learning_rate": 8.53588445058195e-06, "loss": 3.5142, "step": 2862 }, { "epoch": 0.9758009543285617, "grad_norm": 10.883769333870887, "learning_rate": 8.534482063441303e-06, "loss": 3.0691, "step": 2863 }, { "epoch": 0.9761417859577369, "grad_norm": 13.600577558971208, "learning_rate": 8.533079120312554e-06, "loss": 2.7937, "step": 2864 }, { "epoch": 0.976482617586912, "grad_norm": 13.573475197972893, "learning_rate": 8.531675621416396e-06, "loss": 3.1008, "step": 2865 }, { "epoch": 0.9768234492160872, "grad_norm": 17.649423643282336, "learning_rate": 8.530271566973605e-06, "loss": 3.784, "step": 2866 }, { "epoch": 0.9771642808452624, "grad_norm": 14.960649376697887, "learning_rate": 8.528866957205041e-06, "loss": 2.6888, "step": 2867 }, { "epoch": 0.9775051124744376, "grad_norm": 16.23850326656891, "learning_rate": 8.527461792331659e-06, "loss": 3.4392, "step": 2868 }, { "epoch": 0.9778459441036128, "grad_norm": 17.200012591753573, "learning_rate": 8.526056072574495e-06, "loss": 3.2917, "step": 2869 }, { "epoch": 0.978186775732788, "grad_norm": 38.536473449098004, "learning_rate": 8.524649798154674e-06, "loss": 2.436, "step": 2870 }, { "epoch": 0.9785276073619632, "grad_norm": 20.5623174557345, "learning_rate": 8.523242969293408e-06, "loss": 2.8192, "step": 2871 }, { "epoch": 0.9788684389911384, "grad_norm": 24.979037942336948, "learning_rate": 8.521835586211998e-06, "loss": 3.7572, "step": 2872 }, { "epoch": 0.9792092706203136, "grad_norm": 9.606435350033108, "learning_rate": 8.520427649131833e-06, "loss": 2.8564, "step": 2873 }, { "epoch": 0.9795501022494888, "grad_norm": 19.990228436366976, "learning_rate": 8.519019158274382e-06, "loss": 2.9949, "step": 2874 }, { "epoch": 0.979890933878664, "grad_norm": 10.992664656057984, "learning_rate": 8.51761011386121e-06, "loss": 3.2158, "step": 2875 }, { "epoch": 0.9802317655078391, "grad_norm": 33.40465598857599, "learning_rate": 8.516200516113965e-06, "loss": 3.5498, "step": 2876 }, { "epoch": 0.9805725971370143, "grad_norm": 11.92556448221305, "learning_rate": 8.514790365254379e-06, "loss": 2.9747, "step": 2877 }, { "epoch": 0.9809134287661895, "grad_norm": 24.93437219490221, "learning_rate": 8.51337966150428e-06, "loss": 2.6377, "step": 2878 }, { "epoch": 0.9812542603953647, "grad_norm": 17.719848072683153, "learning_rate": 8.511968405085574e-06, "loss": 3.5006, "step": 2879 }, { "epoch": 0.9815950920245399, "grad_norm": 17.49859602308155, "learning_rate": 8.510556596220257e-06, "loss": 2.8587, "step": 2880 }, { "epoch": 0.9819359236537151, "grad_norm": 18.445338803475764, "learning_rate": 8.509144235130412e-06, "loss": 3.5915, "step": 2881 }, { "epoch": 0.9822767552828903, "grad_norm": 23.00355003264217, "learning_rate": 8.50773132203821e-06, "loss": 3.2127, "step": 2882 }, { "epoch": 0.9826175869120655, "grad_norm": 20.32650459532349, "learning_rate": 8.506317857165906e-06, "loss": 3.7823, "step": 2883 }, { "epoch": 0.9829584185412407, "grad_norm": 19.230098443090707, "learning_rate": 8.504903840735847e-06, "loss": 2.6148, "step": 2884 }, { "epoch": 0.9832992501704159, "grad_norm": 25.51961535534717, "learning_rate": 8.503489272970463e-06, "loss": 3.4922, "step": 2885 }, { "epoch": 0.983640081799591, "grad_norm": 28.534901733508026, "learning_rate": 8.502074154092268e-06, "loss": 3.3244, "step": 2886 }, { "epoch": 0.9839809134287661, "grad_norm": 12.550966436926648, "learning_rate": 8.500658484323868e-06, "loss": 3.0892, "step": 2887 }, { "epoch": 0.9843217450579413, "grad_norm": 16.253272217913988, "learning_rate": 8.499242263887953e-06, "loss": 3.7861, "step": 2888 }, { "epoch": 0.9846625766871165, "grad_norm": 16.6406372188896, "learning_rate": 8.4978254930073e-06, "loss": 3.6091, "step": 2889 }, { "epoch": 0.9850034083162917, "grad_norm": 11.43871572211583, "learning_rate": 8.496408171904773e-06, "loss": 3.1258, "step": 2890 }, { "epoch": 0.9853442399454669, "grad_norm": 13.619775771258272, "learning_rate": 8.494990300803323e-06, "loss": 3.0446, "step": 2891 }, { "epoch": 0.9856850715746421, "grad_norm": 30.91177081318645, "learning_rate": 8.493571879925986e-06, "loss": 3.3357, "step": 2892 }, { "epoch": 0.9860259032038173, "grad_norm": 11.886838084074983, "learning_rate": 8.492152909495883e-06, "loss": 3.1527, "step": 2893 }, { "epoch": 0.9863667348329925, "grad_norm": 21.1999653831076, "learning_rate": 8.490733389736227e-06, "loss": 3.5012, "step": 2894 }, { "epoch": 0.9867075664621677, "grad_norm": 18.61527971186803, "learning_rate": 8.489313320870313e-06, "loss": 3.7613, "step": 2895 }, { "epoch": 0.9870483980913429, "grad_norm": 24.84226702527376, "learning_rate": 8.487892703121525e-06, "loss": 3.642, "step": 2896 }, { "epoch": 0.987389229720518, "grad_norm": 12.852360192140019, "learning_rate": 8.486471536713328e-06, "loss": 2.6919, "step": 2897 }, { "epoch": 0.9877300613496932, "grad_norm": 12.885476803797896, "learning_rate": 8.485049821869282e-06, "loss": 3.1084, "step": 2898 }, { "epoch": 0.9880708929788684, "grad_norm": 17.997597498370826, "learning_rate": 8.483627558813023e-06, "loss": 3.4732, "step": 2899 }, { "epoch": 0.9884117246080436, "grad_norm": 14.358909573180082, "learning_rate": 8.482204747768283e-06, "loss": 3.5364, "step": 2900 }, { "epoch": 0.9887525562372188, "grad_norm": 15.392120553878687, "learning_rate": 8.480781388958874e-06, "loss": 3.1144, "step": 2901 }, { "epoch": 0.989093387866394, "grad_norm": 21.631958549697835, "learning_rate": 8.479357482608698e-06, "loss": 3.227, "step": 2902 }, { "epoch": 0.9894342194955692, "grad_norm": 14.790179902885969, "learning_rate": 8.477933028941738e-06, "loss": 2.9892, "step": 2903 }, { "epoch": 0.9897750511247444, "grad_norm": 16.929650054160785, "learning_rate": 8.476508028182068e-06, "loss": 3.6609, "step": 2904 }, { "epoch": 0.9901158827539196, "grad_norm": 13.301036879306281, "learning_rate": 8.475082480553845e-06, "loss": 2.2391, "step": 2905 }, { "epoch": 0.9904567143830948, "grad_norm": 20.154337392298867, "learning_rate": 8.473656386281315e-06, "loss": 4.0546, "step": 2906 }, { "epoch": 0.99079754601227, "grad_norm": 17.84756462943088, "learning_rate": 8.472229745588806e-06, "loss": 2.4679, "step": 2907 }, { "epoch": 0.9911383776414451, "grad_norm": 20.064756421277345, "learning_rate": 8.470802558700739e-06, "loss": 3.2025, "step": 2908 }, { "epoch": 0.9914792092706203, "grad_norm": 17.983850631707597, "learning_rate": 8.46937482584161e-06, "loss": 3.388, "step": 2909 }, { "epoch": 0.9918200408997955, "grad_norm": 20.03977673484492, "learning_rate": 8.467946547236013e-06, "loss": 3.3407, "step": 2910 }, { "epoch": 0.9921608725289707, "grad_norm": 14.867860162207226, "learning_rate": 8.466517723108618e-06, "loss": 3.2095, "step": 2911 }, { "epoch": 0.9925017041581459, "grad_norm": 27.1159081128393, "learning_rate": 8.465088353684186e-06, "loss": 3.2513, "step": 2912 }, { "epoch": 0.9928425357873211, "grad_norm": 19.577186027232766, "learning_rate": 8.46365843918756e-06, "loss": 2.9644, "step": 2913 }, { "epoch": 0.9931833674164963, "grad_norm": 17.43438646495004, "learning_rate": 8.462227979843675e-06, "loss": 3.2013, "step": 2914 }, { "epoch": 0.9935241990456715, "grad_norm": 19.519496729977845, "learning_rate": 8.460796975877547e-06, "loss": 3.8508, "step": 2915 }, { "epoch": 0.9938650306748467, "grad_norm": 14.94106355333602, "learning_rate": 8.459365427514278e-06, "loss": 3.4555, "step": 2916 }, { "epoch": 0.9942058623040219, "grad_norm": 18.060576882678305, "learning_rate": 8.457933334979057e-06, "loss": 3.2966, "step": 2917 }, { "epoch": 0.994546693933197, "grad_norm": 25.75220601130435, "learning_rate": 8.456500698497154e-06, "loss": 3.3007, "step": 2918 }, { "epoch": 0.9948875255623721, "grad_norm": 12.508177303606423, "learning_rate": 8.455067518293934e-06, "loss": 2.8538, "step": 2919 }, { "epoch": 0.9952283571915473, "grad_norm": 23.619607550320882, "learning_rate": 8.45363379459484e-06, "loss": 3.0823, "step": 2920 }, { "epoch": 0.9955691888207225, "grad_norm": 22.57181039332717, "learning_rate": 8.4521995276254e-06, "loss": 3.1306, "step": 2921 }, { "epoch": 0.9959100204498977, "grad_norm": 19.866340546346457, "learning_rate": 8.450764717611235e-06, "loss": 2.9677, "step": 2922 }, { "epoch": 0.9962508520790729, "grad_norm": 23.428682125495197, "learning_rate": 8.44932936477804e-06, "loss": 3.4723, "step": 2923 }, { "epoch": 0.9965916837082481, "grad_norm": 15.052286149566417, "learning_rate": 8.447893469351607e-06, "loss": 3.2926, "step": 2924 }, { "epoch": 0.9969325153374233, "grad_norm": 20.456393357248533, "learning_rate": 8.446457031557805e-06, "loss": 3.6024, "step": 2925 }, { "epoch": 0.9972733469665985, "grad_norm": 15.38630556060551, "learning_rate": 8.445020051622592e-06, "loss": 3.1217, "step": 2926 }, { "epoch": 0.9976141785957737, "grad_norm": 27.493943900141844, "learning_rate": 8.443582529772012e-06, "loss": 2.8535, "step": 2927 }, { "epoch": 0.9979550102249489, "grad_norm": 27.95479975603998, "learning_rate": 8.442144466232191e-06, "loss": 3.727, "step": 2928 }, { "epoch": 0.998295841854124, "grad_norm": 16.751768113758825, "learning_rate": 8.440705861229344e-06, "loss": 2.8237, "step": 2929 }, { "epoch": 0.9986366734832992, "grad_norm": 23.543302606910213, "learning_rate": 8.439266714989768e-06, "loss": 3.922, "step": 2930 }, { "epoch": 0.9989775051124744, "grad_norm": 15.002184047165716, "learning_rate": 8.437827027739848e-06, "loss": 2.9091, "step": 2931 }, { "epoch": 0.9993183367416496, "grad_norm": 14.93972205274078, "learning_rate": 8.43638679970605e-06, "loss": 3.03, "step": 2932 }, { "epoch": 0.9996591683708248, "grad_norm": 17.272998430086624, "learning_rate": 8.43494603111493e-06, "loss": 3.473, "step": 2933 }, { "epoch": 1.0, "grad_norm": 24.182192278620526, "learning_rate": 8.433504722193124e-06, "loss": 3.8168, "step": 2934 }, { "epoch": 1.000340831629175, "grad_norm": 16.870011694155398, "learning_rate": 8.432062873167357e-06, "loss": 2.3946, "step": 2935 }, { "epoch": 1.0006816632583504, "grad_norm": 15.45378259252126, "learning_rate": 8.43062048426444e-06, "loss": 3.0908, "step": 2936 }, { "epoch": 1.0010224948875255, "grad_norm": 11.436594610398576, "learning_rate": 8.429177555711263e-06, "loss": 2.1897, "step": 2937 }, { "epoch": 1.0013633265167008, "grad_norm": 16.925804087550564, "learning_rate": 8.427734087734806e-06, "loss": 2.5268, "step": 2938 }, { "epoch": 1.0017041581458759, "grad_norm": 10.936642764224866, "learning_rate": 8.426290080562132e-06, "loss": 2.4222, "step": 2939 }, { "epoch": 1.0020449897750512, "grad_norm": 27.38259886368823, "learning_rate": 8.424845534420389e-06, "loss": 2.0305, "step": 2940 }, { "epoch": 1.0023858214042263, "grad_norm": 11.719159609130703, "learning_rate": 8.423400449536809e-06, "loss": 1.9912, "step": 2941 }, { "epoch": 1.0027266530334016, "grad_norm": 10.675697988550219, "learning_rate": 8.421954826138712e-06, "loss": 1.9478, "step": 2942 }, { "epoch": 1.0030674846625767, "grad_norm": 19.456917261726804, "learning_rate": 8.420508664453497e-06, "loss": 1.9202, "step": 2943 }, { "epoch": 1.003408316291752, "grad_norm": 10.166978228025158, "learning_rate": 8.419061964708655e-06, "loss": 2.2339, "step": 2944 }, { "epoch": 1.003749147920927, "grad_norm": 13.900642311041366, "learning_rate": 8.417614727131753e-06, "loss": 2.7878, "step": 2945 }, { "epoch": 1.0040899795501022, "grad_norm": 16.068470277631796, "learning_rate": 8.416166951950452e-06, "loss": 2.5421, "step": 2946 }, { "epoch": 1.0044308111792775, "grad_norm": 11.435515601051637, "learning_rate": 8.41471863939249e-06, "loss": 2.5403, "step": 2947 }, { "epoch": 1.0047716428084525, "grad_norm": 18.08518657540011, "learning_rate": 8.413269789685693e-06, "loss": 2.2287, "step": 2948 }, { "epoch": 1.0051124744376279, "grad_norm": 15.212083433230257, "learning_rate": 8.41182040305797e-06, "loss": 2.2113, "step": 2949 }, { "epoch": 1.005453306066803, "grad_norm": 12.007250073913866, "learning_rate": 8.410370479737316e-06, "loss": 2.0771, "step": 2950 }, { "epoch": 1.0057941376959783, "grad_norm": 20.114843549987683, "learning_rate": 8.408920019951808e-06, "loss": 2.2451, "step": 2951 }, { "epoch": 1.0061349693251533, "grad_norm": 19.993448412036706, "learning_rate": 8.407469023929613e-06, "loss": 2.4454, "step": 2952 }, { "epoch": 1.0064758009543286, "grad_norm": 14.495211520181796, "learning_rate": 8.406017491898975e-06, "loss": 2.5565, "step": 2953 }, { "epoch": 1.0068166325835037, "grad_norm": 20.145187321678865, "learning_rate": 8.404565424088226e-06, "loss": 1.6723, "step": 2954 }, { "epoch": 1.007157464212679, "grad_norm": 15.202571997451127, "learning_rate": 8.403112820725787e-06, "loss": 2.0155, "step": 2955 }, { "epoch": 1.0074982958418541, "grad_norm": 16.25349625331195, "learning_rate": 8.40165968204015e-06, "loss": 1.8035, "step": 2956 }, { "epoch": 1.0078391274710292, "grad_norm": 16.56891070562071, "learning_rate": 8.400206008259906e-06, "loss": 2.495, "step": 2957 }, { "epoch": 1.0081799591002045, "grad_norm": 14.020564107765805, "learning_rate": 8.398751799613723e-06, "loss": 2.1595, "step": 2958 }, { "epoch": 1.0085207907293796, "grad_norm": 23.990834631503944, "learning_rate": 8.39729705633035e-06, "loss": 3.113, "step": 2959 }, { "epoch": 1.008861622358555, "grad_norm": 13.012646640483593, "learning_rate": 8.395841778638625e-06, "loss": 2.5958, "step": 2960 }, { "epoch": 1.00920245398773, "grad_norm": 16.149216876893217, "learning_rate": 8.394385966767473e-06, "loss": 2.0657, "step": 2961 }, { "epoch": 1.0095432856169053, "grad_norm": 17.8509695216834, "learning_rate": 8.392929620945893e-06, "loss": 2.2783, "step": 2962 }, { "epoch": 1.0098841172460804, "grad_norm": 19.17849910347601, "learning_rate": 8.39147274140298e-06, "loss": 2.4315, "step": 2963 }, { "epoch": 1.0102249488752557, "grad_norm": 15.403360051243343, "learning_rate": 8.390015328367903e-06, "loss": 2.4307, "step": 2964 }, { "epoch": 1.0105657805044308, "grad_norm": 15.282855319669597, "learning_rate": 8.38855738206992e-06, "loss": 2.5902, "step": 2965 }, { "epoch": 1.010906612133606, "grad_norm": 12.45547651457662, "learning_rate": 8.387098902738371e-06, "loss": 2.3073, "step": 2966 }, { "epoch": 1.0112474437627812, "grad_norm": 28.854770704634024, "learning_rate": 8.385639890602682e-06, "loss": 2.5377, "step": 2967 }, { "epoch": 1.0115882753919563, "grad_norm": 20.287647826450243, "learning_rate": 8.38418034589236e-06, "loss": 2.8899, "step": 2968 }, { "epoch": 1.0119291070211316, "grad_norm": 13.308280237858709, "learning_rate": 8.382720268837e-06, "loss": 2.8145, "step": 2969 }, { "epoch": 1.0122699386503067, "grad_norm": 19.532227245613758, "learning_rate": 8.381259659666274e-06, "loss": 2.7608, "step": 2970 }, { "epoch": 1.012610770279482, "grad_norm": 15.327175206797714, "learning_rate": 8.379798518609946e-06, "loss": 2.3319, "step": 2971 }, { "epoch": 1.012951601908657, "grad_norm": 17.9940879384752, "learning_rate": 8.378336845897855e-06, "loss": 2.3474, "step": 2972 }, { "epoch": 1.0132924335378324, "grad_norm": 13.345762996994136, "learning_rate": 8.376874641759928e-06, "loss": 2.4487, "step": 2973 }, { "epoch": 1.0136332651670075, "grad_norm": 14.429591322414812, "learning_rate": 8.375411906426179e-06, "loss": 2.6001, "step": 2974 }, { "epoch": 1.0139740967961828, "grad_norm": 15.791211626973098, "learning_rate": 8.3739486401267e-06, "loss": 2.4211, "step": 2975 }, { "epoch": 1.0143149284253579, "grad_norm": 14.140575515538307, "learning_rate": 8.37248484309167e-06, "loss": 2.0912, "step": 2976 }, { "epoch": 1.014655760054533, "grad_norm": 15.035755659838799, "learning_rate": 8.37102051555135e-06, "loss": 2.6132, "step": 2977 }, { "epoch": 1.0149965916837083, "grad_norm": 8.884969883377437, "learning_rate": 8.369555657736082e-06, "loss": 1.6373, "step": 2978 }, { "epoch": 1.0153374233128833, "grad_norm": 17.02520619141162, "learning_rate": 8.368090269876294e-06, "loss": 2.5518, "step": 2979 }, { "epoch": 1.0156782549420587, "grad_norm": 12.37932527232506, "learning_rate": 8.366624352202501e-06, "loss": 1.9395, "step": 2980 }, { "epoch": 1.0160190865712337, "grad_norm": 38.663894152801625, "learning_rate": 8.365157904945296e-06, "loss": 2.2597, "step": 2981 }, { "epoch": 1.016359918200409, "grad_norm": 13.781998441443758, "learning_rate": 8.363690928335356e-06, "loss": 1.9735, "step": 2982 }, { "epoch": 1.0167007498295841, "grad_norm": 23.563536883980035, "learning_rate": 8.362223422603442e-06, "loss": 1.8197, "step": 2983 }, { "epoch": 1.0170415814587594, "grad_norm": 18.37201266737557, "learning_rate": 8.360755387980402e-06, "loss": 2.2621, "step": 2984 }, { "epoch": 1.0173824130879345, "grad_norm": 22.031476829186673, "learning_rate": 8.35928682469716e-06, "loss": 2.174, "step": 2985 }, { "epoch": 1.0177232447171098, "grad_norm": 16.59961550867403, "learning_rate": 8.357817732984726e-06, "loss": 2.3534, "step": 2986 }, { "epoch": 1.018064076346285, "grad_norm": 14.419030983901294, "learning_rate": 8.356348113074197e-06, "loss": 2.8242, "step": 2987 }, { "epoch": 1.01840490797546, "grad_norm": 15.918666933985195, "learning_rate": 8.35487796519675e-06, "loss": 2.2908, "step": 2988 }, { "epoch": 1.0187457396046353, "grad_norm": 21.98539237098907, "learning_rate": 8.353407289583644e-06, "loss": 2.1171, "step": 2989 }, { "epoch": 1.0190865712338104, "grad_norm": 15.929630028381956, "learning_rate": 8.35193608646622e-06, "loss": 2.3836, "step": 2990 }, { "epoch": 1.0194274028629857, "grad_norm": 17.434592350149966, "learning_rate": 8.350464356075908e-06, "loss": 2.2862, "step": 2991 }, { "epoch": 1.0197682344921608, "grad_norm": 18.652329104261216, "learning_rate": 8.348992098644216e-06, "loss": 2.5439, "step": 2992 }, { "epoch": 1.0201090661213361, "grad_norm": 17.068499944435413, "learning_rate": 8.347519314402734e-06, "loss": 2.1013, "step": 2993 }, { "epoch": 1.0204498977505112, "grad_norm": 16.910992019311166, "learning_rate": 8.346046003583138e-06, "loss": 2.7617, "step": 2994 }, { "epoch": 1.0207907293796865, "grad_norm": 11.663874612756022, "learning_rate": 8.344572166417186e-06, "loss": 1.8726, "step": 2995 }, { "epoch": 1.0211315610088616, "grad_norm": 17.185465730930233, "learning_rate": 8.34309780313672e-06, "loss": 2.9399, "step": 2996 }, { "epoch": 1.021472392638037, "grad_norm": 23.51640520313903, "learning_rate": 8.34162291397366e-06, "loss": 2.3387, "step": 2997 }, { "epoch": 1.021813224267212, "grad_norm": 14.337619550104389, "learning_rate": 8.340147499160014e-06, "loss": 1.7297, "step": 2998 }, { "epoch": 1.022154055896387, "grad_norm": 12.20701893107469, "learning_rate": 8.33867155892787e-06, "loss": 2.1076, "step": 2999 }, { "epoch": 1.0224948875255624, "grad_norm": 19.945522742097726, "learning_rate": 8.337195093509397e-06, "loss": 1.1566, "step": 3000 }, { "epoch": 1.0228357191547375, "grad_norm": 19.078014494751812, "learning_rate": 8.335718103136854e-06, "loss": 2.5271, "step": 3001 }, { "epoch": 1.0231765507839128, "grad_norm": 14.209076050734804, "learning_rate": 8.334240588042573e-06, "loss": 2.9133, "step": 3002 }, { "epoch": 1.0235173824130879, "grad_norm": 14.115100055936848, "learning_rate": 8.332762548458975e-06, "loss": 2.1811, "step": 3003 }, { "epoch": 1.0238582140422632, "grad_norm": 22.729049435929607, "learning_rate": 8.331283984618562e-06, "loss": 3.0598, "step": 3004 }, { "epoch": 1.0241990456714383, "grad_norm": 12.312665406876421, "learning_rate": 8.329804896753917e-06, "loss": 1.7742, "step": 3005 }, { "epoch": 1.0245398773006136, "grad_norm": 14.63908031957038, "learning_rate": 8.328325285097707e-06, "loss": 2.1559, "step": 3006 }, { "epoch": 1.0248807089297887, "grad_norm": 13.028367967791633, "learning_rate": 8.32684514988268e-06, "loss": 2.0566, "step": 3007 }, { "epoch": 1.025221540558964, "grad_norm": 14.490406216923038, "learning_rate": 8.325364491341666e-06, "loss": 1.7624, "step": 3008 }, { "epoch": 1.025562372188139, "grad_norm": 21.799879219927558, "learning_rate": 8.32388330970758e-06, "loss": 1.8941, "step": 3009 }, { "epoch": 1.0259032038173141, "grad_norm": 15.598456944995535, "learning_rate": 8.32240160521342e-06, "loss": 2.3004, "step": 3010 }, { "epoch": 1.0262440354464895, "grad_norm": 14.566478606049442, "learning_rate": 8.32091937809226e-06, "loss": 2.0838, "step": 3011 }, { "epoch": 1.0265848670756645, "grad_norm": 14.074719088265235, "learning_rate": 8.319436628577262e-06, "loss": 2.4881, "step": 3012 }, { "epoch": 1.0269256987048399, "grad_norm": 15.744788862216913, "learning_rate": 8.31795335690167e-06, "loss": 2.1748, "step": 3013 }, { "epoch": 1.027266530334015, "grad_norm": 13.7999587978313, "learning_rate": 8.316469563298805e-06, "loss": 2.1522, "step": 3014 }, { "epoch": 1.0276073619631902, "grad_norm": 11.462483224095424, "learning_rate": 8.314985248002077e-06, "loss": 1.8285, "step": 3015 }, { "epoch": 1.0279481935923653, "grad_norm": 20.105956892459854, "learning_rate": 8.313500411244973e-06, "loss": 2.2915, "step": 3016 }, { "epoch": 1.0282890252215406, "grad_norm": 17.332264797104557, "learning_rate": 8.312015053261065e-06, "loss": 2.5517, "step": 3017 }, { "epoch": 1.0286298568507157, "grad_norm": 25.736804068350896, "learning_rate": 8.310529174284004e-06, "loss": 2.5159, "step": 3018 }, { "epoch": 1.028970688479891, "grad_norm": 18.15206055189928, "learning_rate": 8.309042774547525e-06, "loss": 2.4168, "step": 3019 }, { "epoch": 1.0293115201090661, "grad_norm": 19.642150752155256, "learning_rate": 8.307555854285445e-06, "loss": 2.4011, "step": 3020 }, { "epoch": 1.0296523517382412, "grad_norm": 18.954346281667657, "learning_rate": 8.306068413731662e-06, "loss": 2.653, "step": 3021 }, { "epoch": 1.0299931833674165, "grad_norm": 13.839428774791044, "learning_rate": 8.304580453120157e-06, "loss": 2.2454, "step": 3022 }, { "epoch": 1.0303340149965916, "grad_norm": 14.938233306906922, "learning_rate": 8.303091972684993e-06, "loss": 1.8844, "step": 3023 }, { "epoch": 1.030674846625767, "grad_norm": 20.15701905690113, "learning_rate": 8.301602972660313e-06, "loss": 2.4438, "step": 3024 }, { "epoch": 1.031015678254942, "grad_norm": 19.042476037802064, "learning_rate": 8.30011345328034e-06, "loss": 1.968, "step": 3025 }, { "epoch": 1.0313565098841173, "grad_norm": 15.821410892679895, "learning_rate": 8.298623414779386e-06, "loss": 2.3062, "step": 3026 }, { "epoch": 1.0316973415132924, "grad_norm": 27.05541356306815, "learning_rate": 8.297132857391836e-06, "loss": 2.509, "step": 3027 }, { "epoch": 1.0320381731424677, "grad_norm": 15.850551969880144, "learning_rate": 8.295641781352165e-06, "loss": 2.0777, "step": 3028 }, { "epoch": 1.0323790047716428, "grad_norm": 14.96001653023231, "learning_rate": 8.294150186894922e-06, "loss": 2.3675, "step": 3029 }, { "epoch": 1.032719836400818, "grad_norm": 27.89636177307034, "learning_rate": 8.29265807425474e-06, "loss": 2.4957, "step": 3030 }, { "epoch": 1.0330606680299932, "grad_norm": 16.92886848697703, "learning_rate": 8.291165443666336e-06, "loss": 2.215, "step": 3031 }, { "epoch": 1.0334014996591683, "grad_norm": 16.741689227119593, "learning_rate": 8.289672295364507e-06, "loss": 2.2491, "step": 3032 }, { "epoch": 1.0337423312883436, "grad_norm": 18.36470972359791, "learning_rate": 8.288178629584131e-06, "loss": 1.804, "step": 3033 }, { "epoch": 1.0340831629175187, "grad_norm": 16.696433116735296, "learning_rate": 8.286684446560165e-06, "loss": 2.0143, "step": 3034 }, { "epoch": 1.034423994546694, "grad_norm": 17.29207957921798, "learning_rate": 8.285189746527653e-06, "loss": 1.4633, "step": 3035 }, { "epoch": 1.034764826175869, "grad_norm": 29.185405326344267, "learning_rate": 8.283694529721717e-06, "loss": 2.0241, "step": 3036 }, { "epoch": 1.0351056578050444, "grad_norm": 19.333599457372, "learning_rate": 8.282198796377561e-06, "loss": 1.717, "step": 3037 }, { "epoch": 1.0354464894342195, "grad_norm": 17.57626224692412, "learning_rate": 8.280702546730466e-06, "loss": 2.3111, "step": 3038 }, { "epoch": 1.0357873210633948, "grad_norm": 15.01687174716507, "learning_rate": 8.279205781015801e-06, "loss": 2.4606, "step": 3039 }, { "epoch": 1.0361281526925699, "grad_norm": 39.19575455555872, "learning_rate": 8.277708499469016e-06, "loss": 2.0121, "step": 3040 }, { "epoch": 1.0364689843217452, "grad_norm": 14.732989123613407, "learning_rate": 8.276210702325631e-06, "loss": 2.0396, "step": 3041 }, { "epoch": 1.0368098159509203, "grad_norm": 21.350769311608744, "learning_rate": 8.274712389821264e-06, "loss": 2.4751, "step": 3042 }, { "epoch": 1.0371506475800953, "grad_norm": 19.892160191363168, "learning_rate": 8.273213562191603e-06, "loss": 2.8179, "step": 3043 }, { "epoch": 1.0374914792092707, "grad_norm": 14.921919109080436, "learning_rate": 8.271714219672417e-06, "loss": 2.5831, "step": 3044 }, { "epoch": 1.0378323108384457, "grad_norm": 13.297269986800188, "learning_rate": 8.27021436249956e-06, "loss": 2.2095, "step": 3045 }, { "epoch": 1.038173142467621, "grad_norm": 16.456075493483166, "learning_rate": 8.268713990908967e-06, "loss": 2.1204, "step": 3046 }, { "epoch": 1.0385139740967961, "grad_norm": 17.29213970067709, "learning_rate": 8.267213105136652e-06, "loss": 1.7629, "step": 3047 }, { "epoch": 1.0388548057259714, "grad_norm": 15.555417633395297, "learning_rate": 8.265711705418706e-06, "loss": 2.6193, "step": 3048 }, { "epoch": 1.0391956373551465, "grad_norm": 15.99762491491181, "learning_rate": 8.264209791991312e-06, "loss": 2.1822, "step": 3049 }, { "epoch": 1.0395364689843218, "grad_norm": 175.37585414932568, "learning_rate": 8.262707365090723e-06, "loss": 2.1971, "step": 3050 }, { "epoch": 1.039877300613497, "grad_norm": 24.310899222953914, "learning_rate": 8.261204424953278e-06, "loss": 2.3931, "step": 3051 }, { "epoch": 1.0402181322426722, "grad_norm": 32.42973777525591, "learning_rate": 8.259700971815393e-06, "loss": 1.8215, "step": 3052 }, { "epoch": 1.0405589638718473, "grad_norm": 17.883271855069403, "learning_rate": 8.258197005913569e-06, "loss": 1.8461, "step": 3053 }, { "epoch": 1.0408997955010224, "grad_norm": 22.660466018032217, "learning_rate": 8.256692527484387e-06, "loss": 2.0931, "step": 3054 }, { "epoch": 1.0412406271301977, "grad_norm": 19.92534481769709, "learning_rate": 8.255187536764506e-06, "loss": 2.7074, "step": 3055 }, { "epoch": 1.0415814587593728, "grad_norm": 16.47940260883423, "learning_rate": 8.25368203399067e-06, "loss": 2.5494, "step": 3056 }, { "epoch": 1.0419222903885481, "grad_norm": 11.898123259954165, "learning_rate": 8.252176019399694e-06, "loss": 1.8906, "step": 3057 }, { "epoch": 1.0422631220177232, "grad_norm": 19.460783811219137, "learning_rate": 8.250669493228485e-06, "loss": 2.3604, "step": 3058 }, { "epoch": 1.0426039536468985, "grad_norm": 16.434052869889303, "learning_rate": 8.249162455714024e-06, "loss": 2.2501, "step": 3059 }, { "epoch": 1.0429447852760736, "grad_norm": 15.397466721566715, "learning_rate": 8.247654907093377e-06, "loss": 1.9695, "step": 3060 }, { "epoch": 1.043285616905249, "grad_norm": 18.73175844908087, "learning_rate": 8.246146847603683e-06, "loss": 1.7781, "step": 3061 }, { "epoch": 1.043626448534424, "grad_norm": 12.85943596935143, "learning_rate": 8.244638277482167e-06, "loss": 2.245, "step": 3062 }, { "epoch": 1.043967280163599, "grad_norm": 12.659537510025315, "learning_rate": 8.243129196966135e-06, "loss": 1.8459, "step": 3063 }, { "epoch": 1.0443081117927744, "grad_norm": 25.71753211914107, "learning_rate": 8.24161960629297e-06, "loss": 2.8123, "step": 3064 }, { "epoch": 1.0446489434219495, "grad_norm": 25.319083245912555, "learning_rate": 8.240109505700137e-06, "loss": 2.384, "step": 3065 }, { "epoch": 1.0449897750511248, "grad_norm": 11.222100149877608, "learning_rate": 8.23859889542518e-06, "loss": 2.1389, "step": 3066 }, { "epoch": 1.0453306066802999, "grad_norm": 13.376979373234589, "learning_rate": 8.237087775705725e-06, "loss": 1.9939, "step": 3067 }, { "epoch": 1.0456714383094752, "grad_norm": 13.333347281002322, "learning_rate": 8.235576146779479e-06, "loss": 2.5065, "step": 3068 }, { "epoch": 1.0460122699386503, "grad_norm": 11.79330833163563, "learning_rate": 8.234064008884221e-06, "loss": 2.0235, "step": 3069 }, { "epoch": 1.0463531015678256, "grad_norm": 18.378096754268437, "learning_rate": 8.232551362257822e-06, "loss": 1.956, "step": 3070 }, { "epoch": 1.0466939331970007, "grad_norm": 12.615913099361963, "learning_rate": 8.231038207138226e-06, "loss": 1.8079, "step": 3071 }, { "epoch": 1.047034764826176, "grad_norm": 20.89480984488167, "learning_rate": 8.229524543763458e-06, "loss": 1.8613, "step": 3072 }, { "epoch": 1.047375596455351, "grad_norm": 17.762584047722363, "learning_rate": 8.228010372371624e-06, "loss": 2.5874, "step": 3073 }, { "epoch": 1.0477164280845261, "grad_norm": 18.70707792620371, "learning_rate": 8.226495693200906e-06, "loss": 2.2664, "step": 3074 }, { "epoch": 1.0480572597137015, "grad_norm": 13.9576596780897, "learning_rate": 8.224980506489573e-06, "loss": 1.885, "step": 3075 }, { "epoch": 1.0483980913428765, "grad_norm": 13.762414859366428, "learning_rate": 8.223464812475968e-06, "loss": 2.3124, "step": 3076 }, { "epoch": 1.0487389229720518, "grad_norm": 22.720594171765338, "learning_rate": 8.221948611398518e-06, "loss": 1.8868, "step": 3077 }, { "epoch": 1.049079754601227, "grad_norm": 12.771370687025618, "learning_rate": 8.220431903495725e-06, "loss": 1.971, "step": 3078 }, { "epoch": 1.0494205862304022, "grad_norm": 11.024773695045058, "learning_rate": 8.218914689006173e-06, "loss": 1.2333, "step": 3079 }, { "epoch": 1.0497614178595773, "grad_norm": 25.89196536520435, "learning_rate": 8.217396968168526e-06, "loss": 2.2958, "step": 3080 }, { "epoch": 1.0501022494887526, "grad_norm": 14.675102868463854, "learning_rate": 8.215878741221527e-06, "loss": 1.3539, "step": 3081 }, { "epoch": 1.0504430811179277, "grad_norm": 13.623436268438427, "learning_rate": 8.214360008404004e-06, "loss": 2.2616, "step": 3082 }, { "epoch": 1.050783912747103, "grad_norm": 20.58460107592449, "learning_rate": 8.212840769954855e-06, "loss": 2.0933, "step": 3083 }, { "epoch": 1.0511247443762781, "grad_norm": 15.375796267023995, "learning_rate": 8.211321026113061e-06, "loss": 2.6491, "step": 3084 }, { "epoch": 1.0514655760054532, "grad_norm": 21.247743340922828, "learning_rate": 8.20980077711769e-06, "loss": 3.327, "step": 3085 }, { "epoch": 1.0518064076346285, "grad_norm": 22.528173769699457, "learning_rate": 8.208280023207875e-06, "loss": 1.9887, "step": 3086 }, { "epoch": 1.0521472392638036, "grad_norm": 15.12423234545914, "learning_rate": 8.206758764622843e-06, "loss": 2.0256, "step": 3087 }, { "epoch": 1.052488070892979, "grad_norm": 14.04171805336934, "learning_rate": 8.205237001601892e-06, "loss": 2.3245, "step": 3088 }, { "epoch": 1.052828902522154, "grad_norm": 14.901915791061269, "learning_rate": 8.203714734384402e-06, "loss": 2.06, "step": 3089 }, { "epoch": 1.0531697341513293, "grad_norm": 13.384545382799981, "learning_rate": 8.20219196320983e-06, "loss": 2.4424, "step": 3090 }, { "epoch": 1.0535105657805044, "grad_norm": 12.349643604676519, "learning_rate": 8.200668688317714e-06, "loss": 1.8983, "step": 3091 }, { "epoch": 1.0538513974096797, "grad_norm": 20.40328846007424, "learning_rate": 8.199144909947674e-06, "loss": 2.5484, "step": 3092 }, { "epoch": 1.0541922290388548, "grad_norm": 15.328369508663503, "learning_rate": 8.197620628339404e-06, "loss": 2.3304, "step": 3093 }, { "epoch": 1.05453306066803, "grad_norm": 14.071576763447034, "learning_rate": 8.19609584373268e-06, "loss": 2.3661, "step": 3094 }, { "epoch": 1.0548738922972052, "grad_norm": 17.135689142809646, "learning_rate": 8.194570556367356e-06, "loss": 1.6676, "step": 3095 }, { "epoch": 1.0552147239263803, "grad_norm": 16.375276998764477, "learning_rate": 8.19304476648337e-06, "loss": 2.3436, "step": 3096 }, { "epoch": 1.0555555555555556, "grad_norm": 14.446102849696802, "learning_rate": 8.191518474320727e-06, "loss": 2.6259, "step": 3097 }, { "epoch": 1.0558963871847307, "grad_norm": 13.177257196662163, "learning_rate": 8.189991680119526e-06, "loss": 2.6169, "step": 3098 }, { "epoch": 1.056237218813906, "grad_norm": 16.105307990036593, "learning_rate": 8.188464384119936e-06, "loss": 2.3657, "step": 3099 }, { "epoch": 1.056578050443081, "grad_norm": 16.362586551654793, "learning_rate": 8.186936586562203e-06, "loss": 2.3062, "step": 3100 }, { "epoch": 1.0569188820722564, "grad_norm": 30.489239018594386, "learning_rate": 8.18540828768666e-06, "loss": 2.573, "step": 3101 }, { "epoch": 1.0572597137014315, "grad_norm": 14.538415121204771, "learning_rate": 8.183879487733713e-06, "loss": 2.9195, "step": 3102 }, { "epoch": 1.0576005453306068, "grad_norm": 19.212063214858837, "learning_rate": 8.182350186943849e-06, "loss": 2.9697, "step": 3103 }, { "epoch": 1.0579413769597819, "grad_norm": 14.010212454036367, "learning_rate": 8.18082038555763e-06, "loss": 2.4357, "step": 3104 }, { "epoch": 1.058282208588957, "grad_norm": 15.292007301305084, "learning_rate": 8.179290083815705e-06, "loss": 2.1053, "step": 3105 }, { "epoch": 1.0586230402181322, "grad_norm": 21.748373388893622, "learning_rate": 8.177759281958792e-06, "loss": 1.636, "step": 3106 }, { "epoch": 1.0589638718473073, "grad_norm": 22.018796826211727, "learning_rate": 8.176227980227693e-06, "loss": 2.8344, "step": 3107 }, { "epoch": 1.0593047034764826, "grad_norm": 19.024505779983993, "learning_rate": 8.174696178863291e-06, "loss": 2.421, "step": 3108 }, { "epoch": 1.0596455351056577, "grad_norm": 13.672222687232669, "learning_rate": 8.173163878106543e-06, "loss": 2.1616, "step": 3109 }, { "epoch": 1.059986366734833, "grad_norm": 15.25692059145895, "learning_rate": 8.171631078198483e-06, "loss": 1.9193, "step": 3110 }, { "epoch": 1.0603271983640081, "grad_norm": 22.784208982359797, "learning_rate": 8.170097779380231e-06, "loss": 2.2727, "step": 3111 }, { "epoch": 1.0606680299931834, "grad_norm": 22.044856795176475, "learning_rate": 8.168563981892977e-06, "loss": 2.5161, "step": 3112 }, { "epoch": 1.0610088616223585, "grad_norm": 16.659562410192517, "learning_rate": 8.167029685977995e-06, "loss": 2.5196, "step": 3113 }, { "epoch": 1.0613496932515338, "grad_norm": 27.850046323118775, "learning_rate": 8.165494891876637e-06, "loss": 2.2761, "step": 3114 }, { "epoch": 1.061690524880709, "grad_norm": 15.914888722676201, "learning_rate": 8.163959599830333e-06, "loss": 2.2639, "step": 3115 }, { "epoch": 1.062031356509884, "grad_norm": 16.791400288915664, "learning_rate": 8.162423810080587e-06, "loss": 1.9989, "step": 3116 }, { "epoch": 1.0623721881390593, "grad_norm": 16.48877382215794, "learning_rate": 8.160887522868988e-06, "loss": 1.7909, "step": 3117 }, { "epoch": 1.0627130197682344, "grad_norm": 22.309940422078828, "learning_rate": 8.159350738437198e-06, "loss": 2.1785, "step": 3118 }, { "epoch": 1.0630538513974097, "grad_norm": 21.833094537866675, "learning_rate": 8.157813457026961e-06, "loss": 2.5992, "step": 3119 }, { "epoch": 1.0633946830265848, "grad_norm": 15.263392397783726, "learning_rate": 8.156275678880095e-06, "loss": 2.9478, "step": 3120 }, { "epoch": 1.06373551465576, "grad_norm": 14.880288203539916, "learning_rate": 8.154737404238501e-06, "loss": 2.1008, "step": 3121 }, { "epoch": 1.0640763462849352, "grad_norm": 17.02765989582731, "learning_rate": 8.153198633344158e-06, "loss": 2.1832, "step": 3122 }, { "epoch": 1.0644171779141105, "grad_norm": 24.829570188693502, "learning_rate": 8.151659366439114e-06, "loss": 2.5098, "step": 3123 }, { "epoch": 1.0647580095432856, "grad_norm": 20.9863948508211, "learning_rate": 8.150119603765507e-06, "loss": 2.6556, "step": 3124 }, { "epoch": 1.065098841172461, "grad_norm": 15.840303815302837, "learning_rate": 8.148579345565545e-06, "loss": 2.2784, "step": 3125 }, { "epoch": 1.065439672801636, "grad_norm": 23.482187577863737, "learning_rate": 8.147038592081521e-06, "loss": 1.9724, "step": 3126 }, { "epoch": 1.065780504430811, "grad_norm": 11.319770090064555, "learning_rate": 8.145497343555796e-06, "loss": 2.0975, "step": 3127 }, { "epoch": 1.0661213360599864, "grad_norm": 18.983544105546937, "learning_rate": 8.14395560023082e-06, "loss": 2.447, "step": 3128 }, { "epoch": 1.0664621676891615, "grad_norm": 11.654782941688701, "learning_rate": 8.142413362349112e-06, "loss": 2.3275, "step": 3129 }, { "epoch": 1.0668029993183368, "grad_norm": 14.04893958573976, "learning_rate": 8.140870630153273e-06, "loss": 1.9325, "step": 3130 }, { "epoch": 1.0671438309475119, "grad_norm": 20.954136504741882, "learning_rate": 8.13932740388598e-06, "loss": 1.9308, "step": 3131 }, { "epoch": 1.0674846625766872, "grad_norm": 14.758378219582532, "learning_rate": 8.137783683789988e-06, "loss": 2.407, "step": 3132 }, { "epoch": 1.0678254942058623, "grad_norm": 19.200079817109145, "learning_rate": 8.136239470108134e-06, "loss": 2.1451, "step": 3133 }, { "epoch": 1.0681663258350376, "grad_norm": 15.804376397751135, "learning_rate": 8.134694763083326e-06, "loss": 2.079, "step": 3134 }, { "epoch": 1.0685071574642127, "grad_norm": 19.835563801842827, "learning_rate": 8.133149562958553e-06, "loss": 2.3474, "step": 3135 }, { "epoch": 1.068847989093388, "grad_norm": 35.89955463328483, "learning_rate": 8.131603869976883e-06, "loss": 2.1913, "step": 3136 }, { "epoch": 1.069188820722563, "grad_norm": 12.141430853569297, "learning_rate": 8.130057684381457e-06, "loss": 2.163, "step": 3137 }, { "epoch": 1.0695296523517381, "grad_norm": 17.015096675282965, "learning_rate": 8.128511006415496e-06, "loss": 2.4282, "step": 3138 }, { "epoch": 1.0698704839809134, "grad_norm": 17.38373503661309, "learning_rate": 8.126963836322301e-06, "loss": 2.3006, "step": 3139 }, { "epoch": 1.0702113156100885, "grad_norm": 12.707510282825181, "learning_rate": 8.125416174345246e-06, "loss": 2.0925, "step": 3140 }, { "epoch": 1.0705521472392638, "grad_norm": 20.923561258168093, "learning_rate": 8.123868020727787e-06, "loss": 2.4227, "step": 3141 }, { "epoch": 1.070892978868439, "grad_norm": 27.126265781637404, "learning_rate": 8.122319375713451e-06, "loss": 2.2754, "step": 3142 }, { "epoch": 1.0712338104976142, "grad_norm": 18.458042558699006, "learning_rate": 8.120770239545849e-06, "loss": 1.9235, "step": 3143 }, { "epoch": 1.0715746421267893, "grad_norm": 17.370463613680858, "learning_rate": 8.119220612468665e-06, "loss": 1.6792, "step": 3144 }, { "epoch": 1.0719154737559646, "grad_norm": 16.92558827196925, "learning_rate": 8.117670494725661e-06, "loss": 2.4309, "step": 3145 }, { "epoch": 1.0722563053851397, "grad_norm": 9.877990698772642, "learning_rate": 8.116119886560679e-06, "loss": 1.6414, "step": 3146 }, { "epoch": 1.072597137014315, "grad_norm": 16.614410430672475, "learning_rate": 8.114568788217633e-06, "loss": 1.997, "step": 3147 }, { "epoch": 1.0729379686434901, "grad_norm": 21.713234715385564, "learning_rate": 8.113017199940518e-06, "loss": 2.3511, "step": 3148 }, { "epoch": 1.0732788002726652, "grad_norm": 20.57344536376163, "learning_rate": 8.111465121973408e-06, "loss": 2.4847, "step": 3149 }, { "epoch": 1.0736196319018405, "grad_norm": 15.338302964548546, "learning_rate": 8.109912554560446e-06, "loss": 2.5154, "step": 3150 }, { "epoch": 1.0739604635310156, "grad_norm": 20.89790366932806, "learning_rate": 8.10835949794586e-06, "loss": 2.0382, "step": 3151 }, { "epoch": 1.074301295160191, "grad_norm": 13.169943436210849, "learning_rate": 8.10680595237395e-06, "loss": 1.8008, "step": 3152 }, { "epoch": 1.074642126789366, "grad_norm": 27.222254481362327, "learning_rate": 8.105251918089097e-06, "loss": 2.6793, "step": 3153 }, { "epoch": 1.0749829584185413, "grad_norm": 26.495432946335, "learning_rate": 8.103697395335755e-06, "loss": 2.3513, "step": 3154 }, { "epoch": 1.0753237900477164, "grad_norm": 18.598940071613598, "learning_rate": 8.10214238435846e-06, "loss": 2.2787, "step": 3155 }, { "epoch": 1.0756646216768917, "grad_norm": 25.750243343036917, "learning_rate": 8.100586885401817e-06, "loss": 2.7696, "step": 3156 }, { "epoch": 1.0760054533060668, "grad_norm": 15.517251216461291, "learning_rate": 8.099030898710516e-06, "loss": 2.1773, "step": 3157 }, { "epoch": 1.076346284935242, "grad_norm": 14.975727028667999, "learning_rate": 8.097474424529317e-06, "loss": 2.1912, "step": 3158 }, { "epoch": 1.0766871165644172, "grad_norm": 22.827158912443142, "learning_rate": 8.09591746310306e-06, "loss": 2.6409, "step": 3159 }, { "epoch": 1.0770279481935923, "grad_norm": 18.317163088871478, "learning_rate": 8.094360014676662e-06, "loss": 1.9059, "step": 3160 }, { "epoch": 1.0773687798227676, "grad_norm": 15.867549967970058, "learning_rate": 8.092802079495116e-06, "loss": 2.4134, "step": 3161 }, { "epoch": 1.0777096114519427, "grad_norm": 13.187792784716995, "learning_rate": 8.091243657803492e-06, "loss": 1.5458, "step": 3162 }, { "epoch": 1.078050443081118, "grad_norm": 53.62445693892496, "learning_rate": 8.089684749846936e-06, "loss": 1.9946, "step": 3163 }, { "epoch": 1.078391274710293, "grad_norm": 19.476277318605565, "learning_rate": 8.088125355870667e-06, "loss": 2.4941, "step": 3164 }, { "epoch": 1.0787321063394684, "grad_norm": 18.940823804116544, "learning_rate": 8.086565476119987e-06, "loss": 2.1838, "step": 3165 }, { "epoch": 1.0790729379686435, "grad_norm": 18.780629244048033, "learning_rate": 8.085005110840272e-06, "loss": 2.3449, "step": 3166 }, { "epoch": 1.0794137695978188, "grad_norm": 15.95047682505284, "learning_rate": 8.083444260276972e-06, "loss": 1.6398, "step": 3167 }, { "epoch": 1.0797546012269938, "grad_norm": 20.216216292697318, "learning_rate": 8.081882924675615e-06, "loss": 2.1688, "step": 3168 }, { "epoch": 1.0800954328561692, "grad_norm": 11.366547208295799, "learning_rate": 8.080321104281806e-06, "loss": 1.9828, "step": 3169 }, { "epoch": 1.0804362644853442, "grad_norm": 20.5170626760122, "learning_rate": 8.078758799341224e-06, "loss": 2.3839, "step": 3170 }, { "epoch": 1.0807770961145193, "grad_norm": 16.55415403907038, "learning_rate": 8.077196010099628e-06, "loss": 2.5871, "step": 3171 }, { "epoch": 1.0811179277436946, "grad_norm": 15.970253486454304, "learning_rate": 8.07563273680285e-06, "loss": 2.7828, "step": 3172 }, { "epoch": 1.0814587593728697, "grad_norm": 19.99269069641523, "learning_rate": 8.0740689796968e-06, "loss": 1.9075, "step": 3173 }, { "epoch": 1.081799591002045, "grad_norm": 17.696483931820286, "learning_rate": 8.072504739027461e-06, "loss": 2.3156, "step": 3174 }, { "epoch": 1.0821404226312201, "grad_norm": 36.2668693526958, "learning_rate": 8.070940015040895e-06, "loss": 2.4483, "step": 3175 }, { "epoch": 1.0824812542603954, "grad_norm": 17.135151907343378, "learning_rate": 8.069374807983243e-06, "loss": 2.0338, "step": 3176 }, { "epoch": 1.0828220858895705, "grad_norm": 20.436114926869138, "learning_rate": 8.067809118100712e-06, "loss": 2.3836, "step": 3177 }, { "epoch": 1.0831629175187458, "grad_norm": 17.590043716308852, "learning_rate": 8.066242945639596e-06, "loss": 2.3458, "step": 3178 }, { "epoch": 1.083503749147921, "grad_norm": 30.126377058694498, "learning_rate": 8.064676290846259e-06, "loss": 2.295, "step": 3179 }, { "epoch": 1.0838445807770962, "grad_norm": 13.084091317004107, "learning_rate": 8.063109153967141e-06, "loss": 2.1502, "step": 3180 }, { "epoch": 1.0841854124062713, "grad_norm": 11.181764811947094, "learning_rate": 8.061541535248759e-06, "loss": 2.1741, "step": 3181 }, { "epoch": 1.0845262440354464, "grad_norm": 13.436189627830931, "learning_rate": 8.059973434937707e-06, "loss": 2.3652, "step": 3182 }, { "epoch": 1.0848670756646217, "grad_norm": 16.622685731419896, "learning_rate": 8.058404853280652e-06, "loss": 1.891, "step": 3183 }, { "epoch": 1.0852079072937968, "grad_norm": 15.315723970787205, "learning_rate": 8.05683579052434e-06, "loss": 2.5497, "step": 3184 }, { "epoch": 1.085548738922972, "grad_norm": 14.416864373302854, "learning_rate": 8.055266246915589e-06, "loss": 1.9673, "step": 3185 }, { "epoch": 1.0858895705521472, "grad_norm": 12.671401902873622, "learning_rate": 8.053696222701294e-06, "loss": 1.8941, "step": 3186 }, { "epoch": 1.0862304021813225, "grad_norm": 15.50992439158444, "learning_rate": 8.052125718128428e-06, "loss": 2.6163, "step": 3187 }, { "epoch": 1.0865712338104976, "grad_norm": 13.587385570349172, "learning_rate": 8.050554733444036e-06, "loss": 2.0261, "step": 3188 }, { "epoch": 1.086912065439673, "grad_norm": 18.53938444912566, "learning_rate": 8.048983268895241e-06, "loss": 1.9923, "step": 3189 }, { "epoch": 1.087252897068848, "grad_norm": 21.9480655032452, "learning_rate": 8.047411324729242e-06, "loss": 1.8066, "step": 3190 }, { "epoch": 1.0875937286980233, "grad_norm": 22.938067352679735, "learning_rate": 8.04583890119331e-06, "loss": 2.207, "step": 3191 }, { "epoch": 1.0879345603271984, "grad_norm": 14.514017640209069, "learning_rate": 8.044265998534793e-06, "loss": 2.1644, "step": 3192 }, { "epoch": 1.0882753919563735, "grad_norm": 17.07840466492976, "learning_rate": 8.042692617001118e-06, "loss": 1.9882, "step": 3193 }, { "epoch": 1.0886162235855488, "grad_norm": 13.33423410121783, "learning_rate": 8.04111875683978e-06, "loss": 1.6925, "step": 3194 }, { "epoch": 1.0889570552147239, "grad_norm": 19.510102516520295, "learning_rate": 8.039544418298358e-06, "loss": 2.3903, "step": 3195 }, { "epoch": 1.0892978868438992, "grad_norm": 22.088352787750637, "learning_rate": 8.037969601624495e-06, "loss": 2.1696, "step": 3196 }, { "epoch": 1.0896387184730743, "grad_norm": 18.47955253328768, "learning_rate": 8.036394307065923e-06, "loss": 2.8124, "step": 3197 }, { "epoch": 1.0899795501022496, "grad_norm": 15.0813233130154, "learning_rate": 8.03481853487044e-06, "loss": 2.3649, "step": 3198 }, { "epoch": 1.0903203817314246, "grad_norm": 14.962493066940002, "learning_rate": 8.033242285285919e-06, "loss": 2.6459, "step": 3199 }, { "epoch": 1.0906612133606, "grad_norm": 16.415201521999474, "learning_rate": 8.031665558560313e-06, "loss": 2.711, "step": 3200 }, { "epoch": 1.091002044989775, "grad_norm": 14.39468983110036, "learning_rate": 8.030088354941645e-06, "loss": 1.9284, "step": 3201 }, { "epoch": 1.0913428766189504, "grad_norm": 16.84682654519699, "learning_rate": 8.028510674678018e-06, "loss": 1.8573, "step": 3202 }, { "epoch": 1.0916837082481254, "grad_norm": 16.049978740945726, "learning_rate": 8.026932518017605e-06, "loss": 2.7321, "step": 3203 }, { "epoch": 1.0920245398773005, "grad_norm": 14.954382030364723, "learning_rate": 8.025353885208657e-06, "loss": 1.7487, "step": 3204 }, { "epoch": 1.0923653715064758, "grad_norm": 16.13165540799585, "learning_rate": 8.023774776499502e-06, "loss": 2.2835, "step": 3205 }, { "epoch": 1.092706203135651, "grad_norm": 11.592206059275936, "learning_rate": 8.022195192138535e-06, "loss": 1.978, "step": 3206 }, { "epoch": 1.0930470347648262, "grad_norm": 14.525045083009802, "learning_rate": 8.020615132374236e-06, "loss": 2.642, "step": 3207 }, { "epoch": 1.0933878663940013, "grad_norm": 15.609060441606163, "learning_rate": 8.01903459745515e-06, "loss": 1.8288, "step": 3208 }, { "epoch": 1.0937286980231766, "grad_norm": 17.76783627876322, "learning_rate": 8.017453587629906e-06, "loss": 2.2987, "step": 3209 }, { "epoch": 1.0940695296523517, "grad_norm": 15.39128602718989, "learning_rate": 8.015872103147199e-06, "loss": 2.4194, "step": 3210 }, { "epoch": 1.094410361281527, "grad_norm": 14.804288407980847, "learning_rate": 8.014290144255806e-06, "loss": 2.4688, "step": 3211 }, { "epoch": 1.094751192910702, "grad_norm": 13.584996030905666, "learning_rate": 8.012707711204573e-06, "loss": 2.3963, "step": 3212 }, { "epoch": 1.0950920245398774, "grad_norm": 14.198138519229806, "learning_rate": 8.011124804242424e-06, "loss": 2.0981, "step": 3213 }, { "epoch": 1.0954328561690525, "grad_norm": 12.815754882040364, "learning_rate": 8.009541423618359e-06, "loss": 1.8702, "step": 3214 }, { "epoch": 1.0957736877982276, "grad_norm": 24.274237888834897, "learning_rate": 8.007957569581447e-06, "loss": 1.4268, "step": 3215 }, { "epoch": 1.096114519427403, "grad_norm": 15.883622539814077, "learning_rate": 8.006373242380834e-06, "loss": 2.2147, "step": 3216 }, { "epoch": 1.096455351056578, "grad_norm": 11.89931443888466, "learning_rate": 8.00478844226574e-06, "loss": 1.6344, "step": 3217 }, { "epoch": 1.0967961826857533, "grad_norm": 13.88622674461722, "learning_rate": 8.003203169485467e-06, "loss": 1.8146, "step": 3218 }, { "epoch": 1.0971370143149284, "grad_norm": 15.156307949321688, "learning_rate": 8.001617424289379e-06, "loss": 2.2974, "step": 3219 }, { "epoch": 1.0974778459441037, "grad_norm": 14.258564864768744, "learning_rate": 8.000031206926921e-06, "loss": 2.2082, "step": 3220 }, { "epoch": 1.0978186775732788, "grad_norm": 14.784228365521683, "learning_rate": 7.998444517647613e-06, "loss": 1.9239, "step": 3221 }, { "epoch": 1.098159509202454, "grad_norm": 17.266649501201943, "learning_rate": 7.996857356701045e-06, "loss": 2.2911, "step": 3222 }, { "epoch": 1.0985003408316292, "grad_norm": 14.980744064622227, "learning_rate": 7.995269724336886e-06, "loss": 2.3697, "step": 3223 }, { "epoch": 1.0988411724608043, "grad_norm": 14.999418377518635, "learning_rate": 7.993681620804874e-06, "loss": 1.7488, "step": 3224 }, { "epoch": 1.0991820040899796, "grad_norm": 15.92277697198263, "learning_rate": 7.992093046354827e-06, "loss": 2.509, "step": 3225 }, { "epoch": 1.0995228357191547, "grad_norm": 18.557292311315123, "learning_rate": 7.990504001236633e-06, "loss": 2.3265, "step": 3226 }, { "epoch": 1.09986366734833, "grad_norm": 18.608192194420543, "learning_rate": 7.988914485700255e-06, "loss": 1.9073, "step": 3227 }, { "epoch": 1.100204498977505, "grad_norm": 14.105403110058363, "learning_rate": 7.98732449999573e-06, "loss": 2.1848, "step": 3228 }, { "epoch": 1.1005453306066804, "grad_norm": 24.20272429202263, "learning_rate": 7.98573404437317e-06, "loss": 2.4388, "step": 3229 }, { "epoch": 1.1008861622358554, "grad_norm": 17.68374320712257, "learning_rate": 7.984143119082759e-06, "loss": 1.5977, "step": 3230 }, { "epoch": 1.1012269938650308, "grad_norm": 19.960332774244147, "learning_rate": 7.982551724374755e-06, "loss": 2.1678, "step": 3231 }, { "epoch": 1.1015678254942058, "grad_norm": 27.83798927372564, "learning_rate": 7.980959860499494e-06, "loss": 2.3072, "step": 3232 }, { "epoch": 1.1019086571233812, "grad_norm": 14.431823204692204, "learning_rate": 7.979367527707379e-06, "loss": 2.3311, "step": 3233 }, { "epoch": 1.1022494887525562, "grad_norm": 22.4567782738246, "learning_rate": 7.97777472624889e-06, "loss": 2.3087, "step": 3234 }, { "epoch": 1.1025903203817313, "grad_norm": 13.615487994904296, "learning_rate": 7.976181456374587e-06, "loss": 2.1664, "step": 3235 }, { "epoch": 1.1029311520109066, "grad_norm": 22.971670825941, "learning_rate": 7.974587718335091e-06, "loss": 2.8007, "step": 3236 }, { "epoch": 1.1032719836400817, "grad_norm": 17.044162581632712, "learning_rate": 7.972993512381105e-06, "loss": 2.074, "step": 3237 }, { "epoch": 1.103612815269257, "grad_norm": 16.396271642580803, "learning_rate": 7.971398838763406e-06, "loss": 1.9353, "step": 3238 }, { "epoch": 1.1039536468984321, "grad_norm": 17.516829745061383, "learning_rate": 7.96980369773284e-06, "loss": 2.3333, "step": 3239 }, { "epoch": 1.1042944785276074, "grad_norm": 16.752776902730993, "learning_rate": 7.968208089540332e-06, "loss": 2.5672, "step": 3240 }, { "epoch": 1.1046353101567825, "grad_norm": 14.80584551306591, "learning_rate": 7.966612014436876e-06, "loss": 2.2669, "step": 3241 }, { "epoch": 1.1049761417859578, "grad_norm": 17.883990157920547, "learning_rate": 7.965015472673538e-06, "loss": 2.1144, "step": 3242 }, { "epoch": 1.105316973415133, "grad_norm": 15.038055043054378, "learning_rate": 7.963418464501464e-06, "loss": 2.7557, "step": 3243 }, { "epoch": 1.105657805044308, "grad_norm": 18.394911270103695, "learning_rate": 7.961820990171871e-06, "loss": 2.9488, "step": 3244 }, { "epoch": 1.1059986366734833, "grad_norm": 14.051256310531572, "learning_rate": 7.960223049936045e-06, "loss": 1.8045, "step": 3245 }, { "epoch": 1.1063394683026584, "grad_norm": 15.271525743524034, "learning_rate": 7.958624644045348e-06, "loss": 2.4993, "step": 3246 }, { "epoch": 1.1066802999318337, "grad_norm": 15.82914739666783, "learning_rate": 7.95702577275122e-06, "loss": 1.9199, "step": 3247 }, { "epoch": 1.1070211315610088, "grad_norm": 20.853518637008605, "learning_rate": 7.955426436305163e-06, "loss": 2.4308, "step": 3248 }, { "epoch": 1.107361963190184, "grad_norm": 22.694153393964385, "learning_rate": 7.953826634958763e-06, "loss": 2.2159, "step": 3249 }, { "epoch": 1.1077027948193592, "grad_norm": 18.463564680008517, "learning_rate": 7.952226368963677e-06, "loss": 2.1331, "step": 3250 }, { "epoch": 1.1080436264485345, "grad_norm": 17.732599113691062, "learning_rate": 7.95062563857163e-06, "loss": 2.3031, "step": 3251 }, { "epoch": 1.1083844580777096, "grad_norm": 21.87268528872712, "learning_rate": 7.949024444034424e-06, "loss": 1.786, "step": 3252 }, { "epoch": 1.1087252897068849, "grad_norm": 23.539314601127803, "learning_rate": 7.947422785603934e-06, "loss": 2.7283, "step": 3253 }, { "epoch": 1.10906612133606, "grad_norm": 12.940591684480484, "learning_rate": 7.945820663532105e-06, "loss": 1.9, "step": 3254 }, { "epoch": 1.109406952965235, "grad_norm": 17.02730000764426, "learning_rate": 7.944218078070963e-06, "loss": 2.8901, "step": 3255 }, { "epoch": 1.1097477845944104, "grad_norm": 17.621491587086457, "learning_rate": 7.942615029472593e-06, "loss": 1.6036, "step": 3256 }, { "epoch": 1.1100886162235855, "grad_norm": 12.794252726550406, "learning_rate": 7.941011517989166e-06, "loss": 1.6849, "step": 3257 }, { "epoch": 1.1104294478527608, "grad_norm": 20.575298570531164, "learning_rate": 7.939407543872921e-06, "loss": 2.5317, "step": 3258 }, { "epoch": 1.1107702794819359, "grad_norm": 15.648678887510131, "learning_rate": 7.937803107376168e-06, "loss": 2.748, "step": 3259 }, { "epoch": 1.1111111111111112, "grad_norm": 13.95351087607938, "learning_rate": 7.936198208751292e-06, "loss": 1.994, "step": 3260 }, { "epoch": 1.1114519427402862, "grad_norm": 14.322990342698946, "learning_rate": 7.934592848250747e-06, "loss": 2.059, "step": 3261 }, { "epoch": 1.1117927743694616, "grad_norm": 10.203945081638588, "learning_rate": 7.932987026127066e-06, "loss": 1.7368, "step": 3262 }, { "epoch": 1.1121336059986366, "grad_norm": 21.683401523568225, "learning_rate": 7.931380742632852e-06, "loss": 2.1464, "step": 3263 }, { "epoch": 1.112474437627812, "grad_norm": 20.25418430358129, "learning_rate": 7.929773998020776e-06, "loss": 2.2328, "step": 3264 }, { "epoch": 1.112815269256987, "grad_norm": 16.26802081380709, "learning_rate": 7.928166792543589e-06, "loss": 2.6076, "step": 3265 }, { "epoch": 1.1131561008861621, "grad_norm": 18.504566926869845, "learning_rate": 7.926559126454109e-06, "loss": 2.988, "step": 3266 }, { "epoch": 1.1134969325153374, "grad_norm": 18.93975025957297, "learning_rate": 7.924951000005228e-06, "loss": 1.9905, "step": 3267 }, { "epoch": 1.1138377641445125, "grad_norm": 17.185824360338753, "learning_rate": 7.92334241344991e-06, "loss": 2.6204, "step": 3268 }, { "epoch": 1.1141785957736878, "grad_norm": 19.216871543627892, "learning_rate": 7.921733367041195e-06, "loss": 2.3781, "step": 3269 }, { "epoch": 1.114519427402863, "grad_norm": 18.667527287921267, "learning_rate": 7.92012386103219e-06, "loss": 2.2127, "step": 3270 }, { "epoch": 1.1148602590320382, "grad_norm": 21.872414346059074, "learning_rate": 7.918513895676077e-06, "loss": 2.4983, "step": 3271 }, { "epoch": 1.1152010906612133, "grad_norm": 18.13650247120749, "learning_rate": 7.91690347122611e-06, "loss": 2.1931, "step": 3272 }, { "epoch": 1.1155419222903886, "grad_norm": 18.133826628312377, "learning_rate": 7.915292587935615e-06, "loss": 2.0104, "step": 3273 }, { "epoch": 1.1158827539195637, "grad_norm": 24.25160585149576, "learning_rate": 7.913681246057992e-06, "loss": 2.2624, "step": 3274 }, { "epoch": 1.116223585548739, "grad_norm": 19.143270950434935, "learning_rate": 7.91206944584671e-06, "loss": 2.4288, "step": 3275 }, { "epoch": 1.116564417177914, "grad_norm": 23.63434836978218, "learning_rate": 7.910457187555311e-06, "loss": 2.78, "step": 3276 }, { "epoch": 1.1169052488070892, "grad_norm": 11.9739410906923, "learning_rate": 7.908844471437413e-06, "loss": 2.0488, "step": 3277 }, { "epoch": 1.1172460804362645, "grad_norm": 19.107530415917232, "learning_rate": 7.907231297746698e-06, "loss": 2.2045, "step": 3278 }, { "epoch": 1.1175869120654396, "grad_norm": 25.106956024708573, "learning_rate": 7.905617666736927e-06, "loss": 2.6024, "step": 3279 }, { "epoch": 1.117927743694615, "grad_norm": 21.84524489752337, "learning_rate": 7.904003578661933e-06, "loss": 2.3441, "step": 3280 }, { "epoch": 1.11826857532379, "grad_norm": 19.531750962038622, "learning_rate": 7.902389033775612e-06, "loss": 2.8603, "step": 3281 }, { "epoch": 1.1186094069529653, "grad_norm": 15.334834321326309, "learning_rate": 7.900774032331943e-06, "loss": 2.3338, "step": 3282 }, { "epoch": 1.1189502385821404, "grad_norm": 18.522699254017493, "learning_rate": 7.899158574584973e-06, "loss": 2.7134, "step": 3283 }, { "epoch": 1.1192910702113157, "grad_norm": 16.16630811759295, "learning_rate": 7.897542660788818e-06, "loss": 2.1765, "step": 3284 }, { "epoch": 1.1196319018404908, "grad_norm": 16.705871435697475, "learning_rate": 7.895926291197667e-06, "loss": 1.7722, "step": 3285 }, { "epoch": 1.119972733469666, "grad_norm": 16.645465159156743, "learning_rate": 7.894309466065784e-06, "loss": 1.4785, "step": 3286 }, { "epoch": 1.1203135650988412, "grad_norm": 15.593261771515197, "learning_rate": 7.892692185647499e-06, "loss": 2.4151, "step": 3287 }, { "epoch": 1.1206543967280163, "grad_norm": 10.427086529340585, "learning_rate": 7.891074450197219e-06, "loss": 1.9486, "step": 3288 }, { "epoch": 1.1209952283571916, "grad_norm": 12.052091473818175, "learning_rate": 7.889456259969418e-06, "loss": 1.8618, "step": 3289 }, { "epoch": 1.1213360599863667, "grad_norm": 15.107123953188882, "learning_rate": 7.887837615218648e-06, "loss": 2.6977, "step": 3290 }, { "epoch": 1.121676891615542, "grad_norm": 16.451701529990903, "learning_rate": 7.886218516199522e-06, "loss": 1.8724, "step": 3291 }, { "epoch": 1.122017723244717, "grad_norm": 21.820672552648134, "learning_rate": 7.884598963166736e-06, "loss": 2.0172, "step": 3292 }, { "epoch": 1.1223585548738924, "grad_norm": 18.33351244253047, "learning_rate": 7.882978956375048e-06, "loss": 2.9295, "step": 3293 }, { "epoch": 1.1226993865030674, "grad_norm": 15.533486967515652, "learning_rate": 7.881358496079297e-06, "loss": 2.7047, "step": 3294 }, { "epoch": 1.1230402181322428, "grad_norm": 18.96750096306392, "learning_rate": 7.879737582534382e-06, "loss": 2.2042, "step": 3295 }, { "epoch": 1.1233810497614178, "grad_norm": 19.035064047222846, "learning_rate": 7.878116215995282e-06, "loss": 2.3091, "step": 3296 }, { "epoch": 1.1237218813905931, "grad_norm": 20.420009180448417, "learning_rate": 7.876494396717044e-06, "loss": 2.5589, "step": 3297 }, { "epoch": 1.1240627130197682, "grad_norm": 16.209909886959682, "learning_rate": 7.874872124954788e-06, "loss": 2.6532, "step": 3298 }, { "epoch": 1.1244035446489433, "grad_norm": 16.285719033829782, "learning_rate": 7.873249400963701e-06, "loss": 2.0218, "step": 3299 }, { "epoch": 1.1247443762781186, "grad_norm": 18.799979813347477, "learning_rate": 7.871626224999048e-06, "loss": 1.7604, "step": 3300 }, { "epoch": 1.1250852079072937, "grad_norm": 14.377028544178179, "learning_rate": 7.870002597316155e-06, "loss": 2.3859, "step": 3301 }, { "epoch": 1.125426039536469, "grad_norm": 16.509835587620174, "learning_rate": 7.86837851817043e-06, "loss": 2.3378, "step": 3302 }, { "epoch": 1.1257668711656441, "grad_norm": 11.50260809591511, "learning_rate": 7.866753987817347e-06, "loss": 1.9984, "step": 3303 }, { "epoch": 1.1261077027948194, "grad_norm": 17.701129683771814, "learning_rate": 7.865129006512448e-06, "loss": 2.2657, "step": 3304 }, { "epoch": 1.1264485344239945, "grad_norm": 21.116108566263353, "learning_rate": 7.863503574511354e-06, "loss": 2.2838, "step": 3305 }, { "epoch": 1.1267893660531698, "grad_norm": 23.30332735577259, "learning_rate": 7.861877692069746e-06, "loss": 2.8452, "step": 3306 }, { "epoch": 1.127130197682345, "grad_norm": 17.81914103254937, "learning_rate": 7.860251359443386e-06, "loss": 2.0968, "step": 3307 }, { "epoch": 1.1274710293115202, "grad_norm": 25.252530302535245, "learning_rate": 7.8586245768881e-06, "loss": 2.1439, "step": 3308 }, { "epoch": 1.1278118609406953, "grad_norm": 15.226571702762017, "learning_rate": 7.856997344659788e-06, "loss": 2.6405, "step": 3309 }, { "epoch": 1.1281526925698704, "grad_norm": 21.776894887482452, "learning_rate": 7.85536966301442e-06, "loss": 2.1528, "step": 3310 }, { "epoch": 1.1284935241990457, "grad_norm": 19.194700210366815, "learning_rate": 7.85374153220804e-06, "loss": 2.4896, "step": 3311 }, { "epoch": 1.1288343558282208, "grad_norm": 12.180401821803889, "learning_rate": 7.852112952496755e-06, "loss": 2.1481, "step": 3312 }, { "epoch": 1.129175187457396, "grad_norm": 14.109818023776713, "learning_rate": 7.850483924136751e-06, "loss": 2.0975, "step": 3313 }, { "epoch": 1.1295160190865712, "grad_norm": 16.473100559633057, "learning_rate": 7.848854447384278e-06, "loss": 2.2334, "step": 3314 }, { "epoch": 1.1298568507157465, "grad_norm": 15.058589659764682, "learning_rate": 7.847224522495657e-06, "loss": 2.2939, "step": 3315 }, { "epoch": 1.1301976823449216, "grad_norm": 14.06300254435552, "learning_rate": 7.845594149727288e-06, "loss": 1.9568, "step": 3316 }, { "epoch": 1.1305385139740969, "grad_norm": 20.44809883669552, "learning_rate": 7.843963329335631e-06, "loss": 2.6436, "step": 3317 }, { "epoch": 1.130879345603272, "grad_norm": 19.11521334290607, "learning_rate": 7.84233206157722e-06, "loss": 2.1564, "step": 3318 }, { "epoch": 1.1312201772324473, "grad_norm": 14.530084262792466, "learning_rate": 7.84070034670866e-06, "loss": 1.8501, "step": 3319 }, { "epoch": 1.1315610088616224, "grad_norm": 14.187532402727902, "learning_rate": 7.839068184986629e-06, "loss": 1.9951, "step": 3320 }, { "epoch": 1.1319018404907975, "grad_norm": 13.612620243325834, "learning_rate": 7.837435576667872e-06, "loss": 2.0879, "step": 3321 }, { "epoch": 1.1322426721199728, "grad_norm": 18.229536225185527, "learning_rate": 7.835802522009201e-06, "loss": 2.2611, "step": 3322 }, { "epoch": 1.1325835037491478, "grad_norm": 16.122363331723726, "learning_rate": 7.834169021267505e-06, "loss": 1.9306, "step": 3323 }, { "epoch": 1.1329243353783232, "grad_norm": 21.18358213001543, "learning_rate": 7.832535074699741e-06, "loss": 2.6036, "step": 3324 }, { "epoch": 1.1332651670074982, "grad_norm": 17.554908609211026, "learning_rate": 7.830900682562933e-06, "loss": 2.2412, "step": 3325 }, { "epoch": 1.1336059986366736, "grad_norm": 13.123615864731933, "learning_rate": 7.82926584511418e-06, "loss": 1.7771, "step": 3326 }, { "epoch": 1.1339468302658486, "grad_norm": 17.11383097258368, "learning_rate": 7.827630562610647e-06, "loss": 2.7906, "step": 3327 }, { "epoch": 1.134287661895024, "grad_norm": 23.490094613840775, "learning_rate": 7.825994835309569e-06, "loss": 2.0116, "step": 3328 }, { "epoch": 1.134628493524199, "grad_norm": 14.725307100363581, "learning_rate": 7.824358663468254e-06, "loss": 1.6223, "step": 3329 }, { "epoch": 1.1349693251533743, "grad_norm": 24.104110069847007, "learning_rate": 7.822722047344077e-06, "loss": 1.8455, "step": 3330 }, { "epoch": 1.1353101567825494, "grad_norm": 29.83963352904978, "learning_rate": 7.821084987194487e-06, "loss": 2.1759, "step": 3331 }, { "epoch": 1.1356509884117245, "grad_norm": 35.9832726150975, "learning_rate": 7.819447483276998e-06, "loss": 2.2729, "step": 3332 }, { "epoch": 1.1359918200408998, "grad_norm": 15.07591925243444, "learning_rate": 7.817809535849197e-06, "loss": 2.2584, "step": 3333 }, { "epoch": 1.136332651670075, "grad_norm": 18.970961587440705, "learning_rate": 7.816171145168739e-06, "loss": 1.8775, "step": 3334 }, { "epoch": 1.1366734832992502, "grad_norm": 23.194811955429742, "learning_rate": 7.814532311493348e-06, "loss": 2.1792, "step": 3335 }, { "epoch": 1.1370143149284253, "grad_norm": 13.860033359785337, "learning_rate": 7.81289303508082e-06, "loss": 2.2617, "step": 3336 }, { "epoch": 1.1373551465576006, "grad_norm": 17.84631157666731, "learning_rate": 7.811253316189023e-06, "loss": 1.9161, "step": 3337 }, { "epoch": 1.1376959781867757, "grad_norm": 19.20181840671097, "learning_rate": 7.809613155075886e-06, "loss": 2.1374, "step": 3338 }, { "epoch": 1.138036809815951, "grad_norm": 27.240052937782078, "learning_rate": 7.807972551999414e-06, "loss": 2.7086, "step": 3339 }, { "epoch": 1.138377641445126, "grad_norm": 22.05717611502453, "learning_rate": 7.806331507217684e-06, "loss": 2.2785, "step": 3340 }, { "epoch": 1.1387184730743014, "grad_norm": 24.31840941539864, "learning_rate": 7.804690020988837e-06, "loss": 2.1519, "step": 3341 }, { "epoch": 1.1390593047034765, "grad_norm": 16.40075977500821, "learning_rate": 7.803048093571084e-06, "loss": 2.0425, "step": 3342 }, { "epoch": 1.1394001363326516, "grad_norm": 27.57541687635015, "learning_rate": 7.801405725222708e-06, "loss": 2.6323, "step": 3343 }, { "epoch": 1.139740967961827, "grad_norm": 22.142334980760744, "learning_rate": 7.799762916202058e-06, "loss": 2.8017, "step": 3344 }, { "epoch": 1.140081799591002, "grad_norm": 11.935185376071686, "learning_rate": 7.798119666767559e-06, "loss": 2.4363, "step": 3345 }, { "epoch": 1.1404226312201773, "grad_norm": 18.36382555861202, "learning_rate": 7.796475977177695e-06, "loss": 2.2847, "step": 3346 }, { "epoch": 1.1407634628493524, "grad_norm": 17.42969349311105, "learning_rate": 7.794831847691028e-06, "loss": 1.9215, "step": 3347 }, { "epoch": 1.1411042944785277, "grad_norm": 21.699400493683267, "learning_rate": 7.793187278566186e-06, "loss": 2.852, "step": 3348 }, { "epoch": 1.1414451261077028, "grad_norm": 17.75710302799252, "learning_rate": 7.791542270061867e-06, "loss": 2.4463, "step": 3349 }, { "epoch": 1.141785957736878, "grad_norm": 19.760825981433086, "learning_rate": 7.789896822436835e-06, "loss": 2.0645, "step": 3350 }, { "epoch": 1.1421267893660532, "grad_norm": 14.071506526439624, "learning_rate": 7.788250935949929e-06, "loss": 2.4669, "step": 3351 }, { "epoch": 1.1424676209952285, "grad_norm": 13.745633775600728, "learning_rate": 7.78660461086005e-06, "loss": 2.043, "step": 3352 }, { "epoch": 1.1428084526244036, "grad_norm": 14.615078090108394, "learning_rate": 7.784957847426174e-06, "loss": 2.7033, "step": 3353 }, { "epoch": 1.1431492842535786, "grad_norm": 17.545058052844286, "learning_rate": 7.78331064590734e-06, "loss": 1.9087, "step": 3354 }, { "epoch": 1.143490115882754, "grad_norm": 16.32023807855148, "learning_rate": 7.781663006562663e-06, "loss": 2.5155, "step": 3355 }, { "epoch": 1.143830947511929, "grad_norm": 18.114867547116596, "learning_rate": 7.780014929651323e-06, "loss": 2.267, "step": 3356 }, { "epoch": 1.1441717791411044, "grad_norm": 19.658994992005546, "learning_rate": 7.778366415432566e-06, "loss": 2.7726, "step": 3357 }, { "epoch": 1.1445126107702794, "grad_norm": 17.985881600089815, "learning_rate": 7.776717464165713e-06, "loss": 2.9494, "step": 3358 }, { "epoch": 1.1448534423994547, "grad_norm": 15.587798550104226, "learning_rate": 7.77506807611015e-06, "loss": 2.0557, "step": 3359 }, { "epoch": 1.1451942740286298, "grad_norm": 16.207464430475465, "learning_rate": 7.773418251525331e-06, "loss": 2.7022, "step": 3360 }, { "epoch": 1.145535105657805, "grad_norm": 16.389940244758403, "learning_rate": 7.77176799067078e-06, "loss": 1.8798, "step": 3361 }, { "epoch": 1.1458759372869802, "grad_norm": 20.90196986928099, "learning_rate": 7.77011729380609e-06, "loss": 2.5315, "step": 3362 }, { "epoch": 1.1462167689161555, "grad_norm": 14.491794805676072, "learning_rate": 7.768466161190924e-06, "loss": 2.3364, "step": 3363 }, { "epoch": 1.1465576005453306, "grad_norm": 13.75439902019629, "learning_rate": 7.76681459308501e-06, "loss": 2.1735, "step": 3364 }, { "epoch": 1.1468984321745057, "grad_norm": 23.875586320181622, "learning_rate": 7.765162589748146e-06, "loss": 2.4275, "step": 3365 }, { "epoch": 1.147239263803681, "grad_norm": 13.669817000461418, "learning_rate": 7.7635101514402e-06, "loss": 1.7657, "step": 3366 }, { "epoch": 1.147580095432856, "grad_norm": 18.458613062388867, "learning_rate": 7.761857278421104e-06, "loss": 2.4672, "step": 3367 }, { "epoch": 1.1479209270620314, "grad_norm": 19.486741022578194, "learning_rate": 7.760203970950866e-06, "loss": 2.3249, "step": 3368 }, { "epoch": 1.1482617586912065, "grad_norm": 11.250491843122916, "learning_rate": 7.758550229289554e-06, "loss": 2.0424, "step": 3369 }, { "epoch": 1.1486025903203818, "grad_norm": 15.186805206006467, "learning_rate": 7.75689605369731e-06, "loss": 2.3832, "step": 3370 }, { "epoch": 1.148943421949557, "grad_norm": 15.462041510349598, "learning_rate": 7.755241444434346e-06, "loss": 2.1058, "step": 3371 }, { "epoch": 1.149284253578732, "grad_norm": 12.484444103319479, "learning_rate": 7.75358640176093e-06, "loss": 2.3678, "step": 3372 }, { "epoch": 1.1496250852079073, "grad_norm": 14.955214170161877, "learning_rate": 7.751930925937414e-06, "loss": 2.095, "step": 3373 }, { "epoch": 1.1499659168370826, "grad_norm": 9.249919633147934, "learning_rate": 7.750275017224208e-06, "loss": 1.8462, "step": 3374 }, { "epoch": 1.1503067484662577, "grad_norm": 17.43559972989925, "learning_rate": 7.748618675881793e-06, "loss": 2.2142, "step": 3375 }, { "epoch": 1.1506475800954328, "grad_norm": 32.791435441444136, "learning_rate": 7.74696190217072e-06, "loss": 2.7314, "step": 3376 }, { "epoch": 1.150988411724608, "grad_norm": 18.809398036581218, "learning_rate": 7.745304696351603e-06, "loss": 2.4865, "step": 3377 }, { "epoch": 1.1513292433537832, "grad_norm": 13.661697032719012, "learning_rate": 7.74364705868513e-06, "loss": 1.8984, "step": 3378 }, { "epoch": 1.1516700749829585, "grad_norm": 23.15591430817826, "learning_rate": 7.741988989432052e-06, "loss": 2.4242, "step": 3379 }, { "epoch": 1.1520109066121336, "grad_norm": 15.32816807623184, "learning_rate": 7.740330488853192e-06, "loss": 2.8042, "step": 3380 }, { "epoch": 1.1523517382413089, "grad_norm": 15.753941385475837, "learning_rate": 7.738671557209437e-06, "loss": 1.7111, "step": 3381 }, { "epoch": 1.152692569870484, "grad_norm": 19.632660391574486, "learning_rate": 7.737012194761743e-06, "loss": 2.4625, "step": 3382 }, { "epoch": 1.153033401499659, "grad_norm": 28.10078405182242, "learning_rate": 7.735352401771136e-06, "loss": 1.9864, "step": 3383 }, { "epoch": 1.1533742331288344, "grad_norm": 15.665789735392659, "learning_rate": 7.733692178498705e-06, "loss": 2.2073, "step": 3384 }, { "epoch": 1.1537150647580097, "grad_norm": 14.621939992101218, "learning_rate": 7.732031525205616e-06, "loss": 2.7708, "step": 3385 }, { "epoch": 1.1540558963871848, "grad_norm": 13.009102069699392, "learning_rate": 7.730370442153092e-06, "loss": 1.8405, "step": 3386 }, { "epoch": 1.1543967280163598, "grad_norm": 12.992180995514437, "learning_rate": 7.728708929602428e-06, "loss": 1.8741, "step": 3387 }, { "epoch": 1.1547375596455351, "grad_norm": 12.947531557969745, "learning_rate": 7.727046987814986e-06, "loss": 2.2434, "step": 3388 }, { "epoch": 1.1550783912747102, "grad_norm": 11.832831115328096, "learning_rate": 7.725384617052198e-06, "loss": 2.3875, "step": 3389 }, { "epoch": 1.1554192229038855, "grad_norm": 14.865087761302346, "learning_rate": 7.72372181757556e-06, "loss": 1.9367, "step": 3390 }, { "epoch": 1.1557600545330606, "grad_norm": 17.9234545085971, "learning_rate": 7.722058589646638e-06, "loss": 1.92, "step": 3391 }, { "epoch": 1.156100886162236, "grad_norm": 12.283253763437534, "learning_rate": 7.720394933527065e-06, "loss": 2.1613, "step": 3392 }, { "epoch": 1.156441717791411, "grad_norm": 15.005772546890071, "learning_rate": 7.718730849478538e-06, "loss": 2.1218, "step": 3393 }, { "epoch": 1.1567825494205861, "grad_norm": 29.68738866772878, "learning_rate": 7.717066337762826e-06, "loss": 2.1741, "step": 3394 }, { "epoch": 1.1571233810497614, "grad_norm": 10.25110259695917, "learning_rate": 7.715401398641764e-06, "loss": 1.8942, "step": 3395 }, { "epoch": 1.1574642126789365, "grad_norm": 14.954398557298907, "learning_rate": 7.713736032377252e-06, "loss": 2.263, "step": 3396 }, { "epoch": 1.1578050443081118, "grad_norm": 29.226301537696163, "learning_rate": 7.712070239231259e-06, "loss": 2.9588, "step": 3397 }, { "epoch": 1.158145875937287, "grad_norm": 19.49017209369269, "learning_rate": 7.710404019465821e-06, "loss": 1.9383, "step": 3398 }, { "epoch": 1.1584867075664622, "grad_norm": 14.755534164061041, "learning_rate": 7.708737373343042e-06, "loss": 2.5223, "step": 3399 }, { "epoch": 1.1588275391956373, "grad_norm": 18.325033933927585, "learning_rate": 7.70707030112509e-06, "loss": 2.0835, "step": 3400 }, { "epoch": 1.1591683708248126, "grad_norm": 13.855776675171917, "learning_rate": 7.705402803074205e-06, "loss": 2.2114, "step": 3401 }, { "epoch": 1.1595092024539877, "grad_norm": 16.98276213234163, "learning_rate": 7.70373487945269e-06, "loss": 1.9083, "step": 3402 }, { "epoch": 1.159850034083163, "grad_norm": 20.28503275423625, "learning_rate": 7.702066530522914e-06, "loss": 2.3952, "step": 3403 }, { "epoch": 1.160190865712338, "grad_norm": 18.43051255013586, "learning_rate": 7.700397756547316e-06, "loss": 2.4471, "step": 3404 }, { "epoch": 1.1605316973415132, "grad_norm": 14.837971430236047, "learning_rate": 7.698728557788404e-06, "loss": 1.57, "step": 3405 }, { "epoch": 1.1608725289706885, "grad_norm": 15.377154919196006, "learning_rate": 7.697058934508743e-06, "loss": 1.8119, "step": 3406 }, { "epoch": 1.1612133605998636, "grad_norm": 17.607149835689807, "learning_rate": 7.695388886970979e-06, "loss": 2.2873, "step": 3407 }, { "epoch": 1.1615541922290389, "grad_norm": 14.985937145461904, "learning_rate": 7.69371841543781e-06, "loss": 2.5539, "step": 3408 }, { "epoch": 1.161895023858214, "grad_norm": 19.72346597645711, "learning_rate": 7.692047520172014e-06, "loss": 2.1347, "step": 3409 }, { "epoch": 1.1622358554873893, "grad_norm": 16.392937516017952, "learning_rate": 7.690376201436427e-06, "loss": 1.8872, "step": 3410 }, { "epoch": 1.1625766871165644, "grad_norm": 18.411805358276542, "learning_rate": 7.688704459493951e-06, "loss": 2.3606, "step": 3411 }, { "epoch": 1.1629175187457397, "grad_norm": 15.575856750297906, "learning_rate": 7.687032294607561e-06, "loss": 1.7731, "step": 3412 }, { "epoch": 1.1632583503749148, "grad_norm": 17.095945003572407, "learning_rate": 7.685359707040296e-06, "loss": 2.252, "step": 3413 }, { "epoch": 1.16359918200409, "grad_norm": 17.244386775196546, "learning_rate": 7.683686697055256e-06, "loss": 2.1421, "step": 3414 }, { "epoch": 1.1639400136332652, "grad_norm": 20.829577947213316, "learning_rate": 7.68201326491562e-06, "loss": 1.7814, "step": 3415 }, { "epoch": 1.1642808452624402, "grad_norm": 18.960435020596222, "learning_rate": 7.680339410884619e-06, "loss": 2.1998, "step": 3416 }, { "epoch": 1.1646216768916156, "grad_norm": 13.268958142318443, "learning_rate": 7.678665135225558e-06, "loss": 2.5912, "step": 3417 }, { "epoch": 1.1649625085207906, "grad_norm": 17.094918093522853, "learning_rate": 7.676990438201808e-06, "loss": 2.2172, "step": 3418 }, { "epoch": 1.165303340149966, "grad_norm": 26.658287165110018, "learning_rate": 7.675315320076809e-06, "loss": 2.4682, "step": 3419 }, { "epoch": 1.165644171779141, "grad_norm": 15.486209747800526, "learning_rate": 7.673639781114055e-06, "loss": 2.6389, "step": 3420 }, { "epoch": 1.1659850034083163, "grad_norm": 20.69204977197211, "learning_rate": 7.671963821577123e-06, "loss": 2.3443, "step": 3421 }, { "epoch": 1.1663258350374914, "grad_norm": 16.01857032250532, "learning_rate": 7.670287441729647e-06, "loss": 2.1366, "step": 3422 }, { "epoch": 1.1666666666666667, "grad_norm": 18.80879519637838, "learning_rate": 7.668610641835325e-06, "loss": 1.8906, "step": 3423 }, { "epoch": 1.1670074982958418, "grad_norm": 16.27658153006072, "learning_rate": 7.666933422157926e-06, "loss": 2.2458, "step": 3424 }, { "epoch": 1.1673483299250171, "grad_norm": 24.01513884687653, "learning_rate": 7.665255782961282e-06, "loss": 2.1456, "step": 3425 }, { "epoch": 1.1676891615541922, "grad_norm": 13.952236201548029, "learning_rate": 7.663577724509295e-06, "loss": 2.3332, "step": 3426 }, { "epoch": 1.1680299931833673, "grad_norm": 16.895645287690993, "learning_rate": 7.661899247065929e-06, "loss": 2.5937, "step": 3427 }, { "epoch": 1.1683708248125426, "grad_norm": 11.108179074757164, "learning_rate": 7.660220350895216e-06, "loss": 1.634, "step": 3428 }, { "epoch": 1.1687116564417177, "grad_norm": 12.698037047350054, "learning_rate": 7.658541036261248e-06, "loss": 1.8058, "step": 3429 }, { "epoch": 1.169052488070893, "grad_norm": 14.551575941801643, "learning_rate": 7.656861303428196e-06, "loss": 2.3621, "step": 3430 }, { "epoch": 1.169393319700068, "grad_norm": 14.521570394262133, "learning_rate": 7.655181152660283e-06, "loss": 2.091, "step": 3431 }, { "epoch": 1.1697341513292434, "grad_norm": 18.76161708598552, "learning_rate": 7.653500584221804e-06, "loss": 2.2979, "step": 3432 }, { "epoch": 1.1700749829584185, "grad_norm": 16.98365152451555, "learning_rate": 7.651819598377122e-06, "loss": 3.2343, "step": 3433 }, { "epoch": 1.1704158145875938, "grad_norm": 31.006627607798507, "learning_rate": 7.65013819539066e-06, "loss": 2.4609, "step": 3434 }, { "epoch": 1.170756646216769, "grad_norm": 40.630471870113595, "learning_rate": 7.648456375526912e-06, "loss": 2.3612, "step": 3435 }, { "epoch": 1.1710974778459442, "grad_norm": 16.226210703905597, "learning_rate": 7.646774139050433e-06, "loss": 1.9699, "step": 3436 }, { "epoch": 1.1714383094751193, "grad_norm": 20.227294055544572, "learning_rate": 7.645091486225844e-06, "loss": 2.5663, "step": 3437 }, { "epoch": 1.1717791411042944, "grad_norm": 16.20322603923074, "learning_rate": 7.643408417317833e-06, "loss": 2.2977, "step": 3438 }, { "epoch": 1.1721199727334697, "grad_norm": 14.854843781940165, "learning_rate": 7.64172493259116e-06, "loss": 2.5916, "step": 3439 }, { "epoch": 1.1724608043626448, "grad_norm": 16.29702578753333, "learning_rate": 7.640041032310634e-06, "loss": 1.6729, "step": 3440 }, { "epoch": 1.17280163599182, "grad_norm": 19.534156049314966, "learning_rate": 7.63835671674115e-06, "loss": 1.9279, "step": 3441 }, { "epoch": 1.1731424676209952, "grad_norm": 18.968088419834693, "learning_rate": 7.636671986147646e-06, "loss": 2.3098, "step": 3442 }, { "epoch": 1.1734832992501705, "grad_norm": 16.630196106036905, "learning_rate": 7.634986840795145e-06, "loss": 2.2328, "step": 3443 }, { "epoch": 1.1738241308793456, "grad_norm": 14.9746838328074, "learning_rate": 7.633301280948726e-06, "loss": 2.209, "step": 3444 }, { "epoch": 1.1741649625085209, "grad_norm": 16.314422963903528, "learning_rate": 7.63161530687353e-06, "loss": 2.653, "step": 3445 }, { "epoch": 1.174505794137696, "grad_norm": 13.613653704986504, "learning_rate": 7.629928918834773e-06, "loss": 2.1704, "step": 3446 }, { "epoch": 1.1748466257668713, "grad_norm": 14.778198186893983, "learning_rate": 7.628242117097726e-06, "loss": 2.4202, "step": 3447 }, { "epoch": 1.1751874573960464, "grad_norm": 19.452591408056715, "learning_rate": 7.626554901927733e-06, "loss": 2.0892, "step": 3448 }, { "epoch": 1.1755282890252214, "grad_norm": 15.84077361299141, "learning_rate": 7.624867273590197e-06, "loss": 2.3837, "step": 3449 }, { "epoch": 1.1758691206543967, "grad_norm": 19.112587468243554, "learning_rate": 7.6231792323505905e-06, "loss": 2.3467, "step": 3450 }, { "epoch": 1.1762099522835718, "grad_norm": 19.397293979358363, "learning_rate": 7.621490778474449e-06, "loss": 2.611, "step": 3451 }, { "epoch": 1.1765507839127471, "grad_norm": 19.610323334801798, "learning_rate": 7.619801912227371e-06, "loss": 2.1823, "step": 3452 }, { "epoch": 1.1768916155419222, "grad_norm": 18.76558405734609, "learning_rate": 7.618112633875025e-06, "loss": 2.0959, "step": 3453 }, { "epoch": 1.1772324471710975, "grad_norm": 14.953699537050474, "learning_rate": 7.616422943683139e-06, "loss": 2.6041, "step": 3454 }, { "epoch": 1.1775732788002726, "grad_norm": 15.151714738165927, "learning_rate": 7.61473284191751e-06, "loss": 2.2419, "step": 3455 }, { "epoch": 1.177914110429448, "grad_norm": 22.50036755961169, "learning_rate": 7.613042328843994e-06, "loss": 2.8255, "step": 3456 }, { "epoch": 1.178254942058623, "grad_norm": 18.6934667462161, "learning_rate": 7.611351404728519e-06, "loss": 2.2603, "step": 3457 }, { "epoch": 1.1785957736877983, "grad_norm": 14.762762997775798, "learning_rate": 7.609660069837073e-06, "loss": 1.7921, "step": 3458 }, { "epoch": 1.1789366053169734, "grad_norm": 17.097164622593485, "learning_rate": 7.607968324435708e-06, "loss": 2.6404, "step": 3459 }, { "epoch": 1.1792774369461485, "grad_norm": 26.073557694704736, "learning_rate": 7.6062761687905455e-06, "loss": 2.2309, "step": 3460 }, { "epoch": 1.1796182685753238, "grad_norm": 16.157653947715477, "learning_rate": 7.604583603167766e-06, "loss": 2.1912, "step": 3461 }, { "epoch": 1.179959100204499, "grad_norm": 13.294126846034418, "learning_rate": 7.602890627833618e-06, "loss": 2.408, "step": 3462 }, { "epoch": 1.1802999318336742, "grad_norm": 12.93306572507711, "learning_rate": 7.601197243054411e-06, "loss": 2.4086, "step": 3463 }, { "epoch": 1.1806407634628493, "grad_norm": 13.442976883451449, "learning_rate": 7.599503449096526e-06, "loss": 2.4956, "step": 3464 }, { "epoch": 1.1809815950920246, "grad_norm": 14.37042108451778, "learning_rate": 7.597809246226398e-06, "loss": 2.5281, "step": 3465 }, { "epoch": 1.1813224267211997, "grad_norm": 15.29923013691024, "learning_rate": 7.596114634710534e-06, "loss": 1.8084, "step": 3466 }, { "epoch": 1.181663258350375, "grad_norm": 16.542785688982672, "learning_rate": 7.5944196148155046e-06, "loss": 2.3021, "step": 3467 }, { "epoch": 1.18200408997955, "grad_norm": 29.492018360272354, "learning_rate": 7.592724186807941e-06, "loss": 1.8462, "step": 3468 }, { "epoch": 1.1823449216087254, "grad_norm": 16.49800962249054, "learning_rate": 7.591028350954544e-06, "loss": 1.3855, "step": 3469 }, { "epoch": 1.1826857532379005, "grad_norm": 18.50151909277116, "learning_rate": 7.589332107522071e-06, "loss": 2.2691, "step": 3470 }, { "epoch": 1.1830265848670756, "grad_norm": 16.582138227934575, "learning_rate": 7.587635456777352e-06, "loss": 2.6979, "step": 3471 }, { "epoch": 1.1833674164962509, "grad_norm": 10.106485964794352, "learning_rate": 7.585938398987271e-06, "loss": 1.6698, "step": 3472 }, { "epoch": 1.183708248125426, "grad_norm": 19.7502523266852, "learning_rate": 7.58424093441879e-06, "loss": 2.718, "step": 3473 }, { "epoch": 1.1840490797546013, "grad_norm": 21.747719715611176, "learning_rate": 7.582543063338921e-06, "loss": 2.4786, "step": 3474 }, { "epoch": 1.1843899113837764, "grad_norm": 15.2404388449691, "learning_rate": 7.580844786014747e-06, "loss": 2.5842, "step": 3475 }, { "epoch": 1.1847307430129517, "grad_norm": 15.633919224149883, "learning_rate": 7.579146102713415e-06, "loss": 2.5067, "step": 3476 }, { "epoch": 1.1850715746421268, "grad_norm": 18.539120473212122, "learning_rate": 7.577447013702134e-06, "loss": 2.2343, "step": 3477 }, { "epoch": 1.185412406271302, "grad_norm": 17.447620857011277, "learning_rate": 7.5757475192481775e-06, "loss": 2.5094, "step": 3478 }, { "epoch": 1.1857532379004772, "grad_norm": 15.063753855306148, "learning_rate": 7.574047619618883e-06, "loss": 2.7539, "step": 3479 }, { "epoch": 1.1860940695296525, "grad_norm": 20.61940711644142, "learning_rate": 7.572347315081651e-06, "loss": 2.2919, "step": 3480 }, { "epoch": 1.1864349011588275, "grad_norm": 13.07162816423925, "learning_rate": 7.570646605903946e-06, "loss": 2.5554, "step": 3481 }, { "epoch": 1.1867757327880026, "grad_norm": 13.773736762052865, "learning_rate": 7.568945492353298e-06, "loss": 1.9924, "step": 3482 }, { "epoch": 1.187116564417178, "grad_norm": 21.899165872731015, "learning_rate": 7.567243974697297e-06, "loss": 2.272, "step": 3483 }, { "epoch": 1.187457396046353, "grad_norm": 17.739357909718006, "learning_rate": 7.5655420532035986e-06, "loss": 2.469, "step": 3484 }, { "epoch": 1.1877982276755283, "grad_norm": 14.158885780186182, "learning_rate": 7.5638397281399235e-06, "loss": 2.2376, "step": 3485 }, { "epoch": 1.1881390593047034, "grad_norm": 19.128590764027564, "learning_rate": 7.562136999774053e-06, "loss": 1.658, "step": 3486 }, { "epoch": 1.1884798909338787, "grad_norm": 14.202242255225126, "learning_rate": 7.560433868373833e-06, "loss": 2.9048, "step": 3487 }, { "epoch": 1.1888207225630538, "grad_norm": 15.492488452833802, "learning_rate": 7.558730334207172e-06, "loss": 2.22, "step": 3488 }, { "epoch": 1.1891615541922291, "grad_norm": 17.351415090181845, "learning_rate": 7.557026397542044e-06, "loss": 2.6243, "step": 3489 }, { "epoch": 1.1895023858214042, "grad_norm": 17.71521766015105, "learning_rate": 7.555322058646486e-06, "loss": 2.2206, "step": 3490 }, { "epoch": 1.1898432174505795, "grad_norm": 14.930696113850889, "learning_rate": 7.553617317788595e-06, "loss": 2.0301, "step": 3491 }, { "epoch": 1.1901840490797546, "grad_norm": 16.163062023195884, "learning_rate": 7.551912175236533e-06, "loss": 2.4345, "step": 3492 }, { "epoch": 1.1905248807089297, "grad_norm": 19.363770558920987, "learning_rate": 7.55020663125853e-06, "loss": 1.8443, "step": 3493 }, { "epoch": 1.190865712338105, "grad_norm": 18.827202837121124, "learning_rate": 7.5485006861228706e-06, "loss": 2.4356, "step": 3494 }, { "epoch": 1.19120654396728, "grad_norm": 13.827659993959035, "learning_rate": 7.546794340097909e-06, "loss": 2.4537, "step": 3495 }, { "epoch": 1.1915473755964554, "grad_norm": 21.1689276759338, "learning_rate": 7.545087593452061e-06, "loss": 1.9085, "step": 3496 }, { "epoch": 1.1918882072256305, "grad_norm": 18.274134830929608, "learning_rate": 7.5433804464538e-06, "loss": 2.735, "step": 3497 }, { "epoch": 1.1922290388548058, "grad_norm": 512.2358164496825, "learning_rate": 7.541672899371672e-06, "loss": 2.1946, "step": 3498 }, { "epoch": 1.1925698704839809, "grad_norm": 13.868302182733672, "learning_rate": 7.539964952474279e-06, "loss": 2.1872, "step": 3499 }, { "epoch": 1.1929107021131562, "grad_norm": 15.590274893294607, "learning_rate": 7.538256606030287e-06, "loss": 2.104, "step": 3500 }, { "epoch": 1.1932515337423313, "grad_norm": 17.775128656643314, "learning_rate": 7.536547860308428e-06, "loss": 2.1162, "step": 3501 }, { "epoch": 1.1935923653715066, "grad_norm": 14.211993104134171, "learning_rate": 7.534838715577491e-06, "loss": 1.9346, "step": 3502 }, { "epoch": 1.1939331970006817, "grad_norm": 14.916928468703803, "learning_rate": 7.533129172106335e-06, "loss": 2.4239, "step": 3503 }, { "epoch": 1.1942740286298568, "grad_norm": 16.080622769918858, "learning_rate": 7.531419230163876e-06, "loss": 2.3989, "step": 3504 }, { "epoch": 1.194614860259032, "grad_norm": 16.338626271468147, "learning_rate": 7.529708890019096e-06, "loss": 2.0289, "step": 3505 }, { "epoch": 1.1949556918882072, "grad_norm": 14.813461801271133, "learning_rate": 7.527998151941037e-06, "loss": 2.511, "step": 3506 }, { "epoch": 1.1952965235173825, "grad_norm": 12.28832208796034, "learning_rate": 7.526287016198805e-06, "loss": 1.8742, "step": 3507 }, { "epoch": 1.1956373551465576, "grad_norm": 21.62545993358652, "learning_rate": 7.524575483061569e-06, "loss": 2.4573, "step": 3508 }, { "epoch": 1.1959781867757329, "grad_norm": 14.123824505739906, "learning_rate": 7.52286355279856e-06, "loss": 2.0966, "step": 3509 }, { "epoch": 1.196319018404908, "grad_norm": 14.862403428297199, "learning_rate": 7.521151225679071e-06, "loss": 2.3123, "step": 3510 }, { "epoch": 1.196659850034083, "grad_norm": 22.112961896284595, "learning_rate": 7.519438501972458e-06, "loss": 2.3274, "step": 3511 }, { "epoch": 1.1970006816632583, "grad_norm": 15.942780992008942, "learning_rate": 7.5177253819481396e-06, "loss": 2.4161, "step": 3512 }, { "epoch": 1.1973415132924337, "grad_norm": 18.485970328902294, "learning_rate": 7.5160118658755964e-06, "loss": 2.0957, "step": 3513 }, { "epoch": 1.1976823449216087, "grad_norm": 17.268528589898256, "learning_rate": 7.5142979540243685e-06, "loss": 2.0666, "step": 3514 }, { "epoch": 1.1980231765507838, "grad_norm": 15.542683061783723, "learning_rate": 7.512583646664067e-06, "loss": 2.2258, "step": 3515 }, { "epoch": 1.1983640081799591, "grad_norm": 15.588339621167648, "learning_rate": 7.510868944064354e-06, "loss": 2.1227, "step": 3516 }, { "epoch": 1.1987048398091342, "grad_norm": 41.67270728093458, "learning_rate": 7.509153846494961e-06, "loss": 2.3, "step": 3517 }, { "epoch": 1.1990456714383095, "grad_norm": 17.54488124921181, "learning_rate": 7.50743835422568e-06, "loss": 2.373, "step": 3518 }, { "epoch": 1.1993865030674846, "grad_norm": 16.313485615169153, "learning_rate": 7.505722467526364e-06, "loss": 2.1935, "step": 3519 }, { "epoch": 1.19972733469666, "grad_norm": 13.0819249225591, "learning_rate": 7.50400618666693e-06, "loss": 1.9396, "step": 3520 }, { "epoch": 1.200068166325835, "grad_norm": 18.89921947599337, "learning_rate": 7.502289511917355e-06, "loss": 2.2365, "step": 3521 }, { "epoch": 1.20040899795501, "grad_norm": 16.189927551102976, "learning_rate": 7.500572443547677e-06, "loss": 1.8924, "step": 3522 }, { "epoch": 1.2007498295841854, "grad_norm": 14.195569715103849, "learning_rate": 7.498854981828001e-06, "loss": 2.3932, "step": 3523 }, { "epoch": 1.2010906612133607, "grad_norm": 17.267910079188276, "learning_rate": 7.4971371270284885e-06, "loss": 2.4188, "step": 3524 }, { "epoch": 1.2014314928425358, "grad_norm": 94.83606544723433, "learning_rate": 7.495418879419365e-06, "loss": 2.5374, "step": 3525 }, { "epoch": 1.201772324471711, "grad_norm": 18.84020120230579, "learning_rate": 7.493700239270919e-06, "loss": 2.4035, "step": 3526 }, { "epoch": 1.2021131561008862, "grad_norm": 24.47806679942933, "learning_rate": 7.491981206853498e-06, "loss": 2.1002, "step": 3527 }, { "epoch": 1.2024539877300613, "grad_norm": 14.287761051079594, "learning_rate": 7.4902617824375134e-06, "loss": 2.2792, "step": 3528 }, { "epoch": 1.2027948193592366, "grad_norm": 20.46669042358702, "learning_rate": 7.488541966293438e-06, "loss": 2.1284, "step": 3529 }, { "epoch": 1.2031356509884117, "grad_norm": 13.302351299017708, "learning_rate": 7.4868217586918036e-06, "loss": 2.4003, "step": 3530 }, { "epoch": 1.203476482617587, "grad_norm": 24.480064446251045, "learning_rate": 7.485101159903207e-06, "loss": 2.0672, "step": 3531 }, { "epoch": 1.203817314246762, "grad_norm": 34.68355908976286, "learning_rate": 7.4833801701983066e-06, "loss": 2.2491, "step": 3532 }, { "epoch": 1.2041581458759372, "grad_norm": 18.179813543348498, "learning_rate": 7.481658789847819e-06, "loss": 2.0346, "step": 3533 }, { "epoch": 1.2044989775051125, "grad_norm": 15.267720903621887, "learning_rate": 7.479937019122524e-06, "loss": 2.4428, "step": 3534 }, { "epoch": 1.2048398091342876, "grad_norm": 17.243204896623645, "learning_rate": 7.478214858293264e-06, "loss": 2.0495, "step": 3535 }, { "epoch": 1.2051806407634629, "grad_norm": 13.933753906527432, "learning_rate": 7.476492307630943e-06, "loss": 2.0939, "step": 3536 }, { "epoch": 1.205521472392638, "grad_norm": 19.04749311815318, "learning_rate": 7.474769367406524e-06, "loss": 2.2846, "step": 3537 }, { "epoch": 1.2058623040218133, "grad_norm": 20.264200700466226, "learning_rate": 7.47304603789103e-06, "loss": 2.1143, "step": 3538 }, { "epoch": 1.2062031356509884, "grad_norm": 28.577205650221806, "learning_rate": 7.47132231935555e-06, "loss": 2.7169, "step": 3539 }, { "epoch": 1.2065439672801637, "grad_norm": 16.47604754668848, "learning_rate": 7.4695982120712315e-06, "loss": 1.8882, "step": 3540 }, { "epoch": 1.2068847989093388, "grad_norm": 14.210003805229501, "learning_rate": 7.467873716309284e-06, "loss": 2.1854, "step": 3541 }, { "epoch": 1.207225630538514, "grad_norm": 16.27891176840281, "learning_rate": 7.466148832340977e-06, "loss": 2.5056, "step": 3542 }, { "epoch": 1.2075664621676891, "grad_norm": 17.821009301867054, "learning_rate": 7.4644235604376416e-06, "loss": 2.2501, "step": 3543 }, { "epoch": 1.2079072937968642, "grad_norm": 20.500257986162847, "learning_rate": 7.462697900870668e-06, "loss": 2.7542, "step": 3544 }, { "epoch": 1.2082481254260395, "grad_norm": 13.582886360699128, "learning_rate": 7.460971853911513e-06, "loss": 2.2556, "step": 3545 }, { "epoch": 1.2085889570552146, "grad_norm": 15.78654101357696, "learning_rate": 7.459245419831689e-06, "loss": 2.5435, "step": 3546 }, { "epoch": 1.20892978868439, "grad_norm": 23.048750222946015, "learning_rate": 7.4575185989027695e-06, "loss": 2.1116, "step": 3547 }, { "epoch": 1.209270620313565, "grad_norm": 15.515721714143368, "learning_rate": 7.4557913913963916e-06, "loss": 2.6657, "step": 3548 }, { "epoch": 1.2096114519427403, "grad_norm": 10.832170245095888, "learning_rate": 7.454063797584253e-06, "loss": 2.1862, "step": 3549 }, { "epoch": 1.2099522835719154, "grad_norm": 18.582673382198987, "learning_rate": 7.45233581773811e-06, "loss": 3.2494, "step": 3550 }, { "epoch": 1.2102931152010907, "grad_norm": 10.861338513620241, "learning_rate": 7.45060745212978e-06, "loss": 2.0246, "step": 3551 }, { "epoch": 1.2106339468302658, "grad_norm": 17.498058943391214, "learning_rate": 7.4488787010311425e-06, "loss": 2.3863, "step": 3552 }, { "epoch": 1.2109747784594411, "grad_norm": 28.044141153900842, "learning_rate": 7.447149564714137e-06, "loss": 2.0357, "step": 3553 }, { "epoch": 1.2113156100886162, "grad_norm": 13.903379749251021, "learning_rate": 7.445420043450764e-06, "loss": 2.819, "step": 3554 }, { "epoch": 1.2116564417177913, "grad_norm": 17.04732605546652, "learning_rate": 7.443690137513083e-06, "loss": 2.414, "step": 3555 }, { "epoch": 1.2119972733469666, "grad_norm": 17.554506437243358, "learning_rate": 7.441959847173215e-06, "loss": 2.2809, "step": 3556 }, { "epoch": 1.2123381049761417, "grad_norm": 19.633217608871956, "learning_rate": 7.440229172703344e-06, "loss": 2.4076, "step": 3557 }, { "epoch": 1.212678936605317, "grad_norm": 18.132705488540648, "learning_rate": 7.438498114375707e-06, "loss": 2.4662, "step": 3558 }, { "epoch": 1.213019768234492, "grad_norm": 14.304748730703931, "learning_rate": 7.436766672462612e-06, "loss": 1.9257, "step": 3559 }, { "epoch": 1.2133605998636674, "grad_norm": 22.10002847812441, "learning_rate": 7.435034847236419e-06, "loss": 2.5424, "step": 3560 }, { "epoch": 1.2137014314928425, "grad_norm": 16.60651224361225, "learning_rate": 7.43330263896955e-06, "loss": 2.518, "step": 3561 }, { "epoch": 1.2140422631220178, "grad_norm": 15.743519571927683, "learning_rate": 7.431570047934491e-06, "loss": 1.7291, "step": 3562 }, { "epoch": 1.2143830947511929, "grad_norm": 15.163739235112855, "learning_rate": 7.429837074403784e-06, "loss": 1.9598, "step": 3563 }, { "epoch": 1.2147239263803682, "grad_norm": 10.663582874639896, "learning_rate": 7.42810371865003e-06, "loss": 2.0449, "step": 3564 }, { "epoch": 1.2150647580095433, "grad_norm": 12.569432341045202, "learning_rate": 7.4263699809458975e-06, "loss": 1.4582, "step": 3565 }, { "epoch": 1.2154055896387184, "grad_norm": 14.032354167235962, "learning_rate": 7.424635861564109e-06, "loss": 2.3405, "step": 3566 }, { "epoch": 1.2157464212678937, "grad_norm": 15.933161410948284, "learning_rate": 7.422901360777446e-06, "loss": 2.526, "step": 3567 }, { "epoch": 1.2160872528970688, "grad_norm": 18.96289055351101, "learning_rate": 7.421166478858754e-06, "loss": 2.383, "step": 3568 }, { "epoch": 1.216428084526244, "grad_norm": 13.634793147707958, "learning_rate": 7.419431216080937e-06, "loss": 2.2559, "step": 3569 }, { "epoch": 1.2167689161554192, "grad_norm": 19.176283877738257, "learning_rate": 7.417695572716959e-06, "loss": 2.4939, "step": 3570 }, { "epoch": 1.2171097477845945, "grad_norm": 14.145313757268626, "learning_rate": 7.415959549039843e-06, "loss": 2.2575, "step": 3571 }, { "epoch": 1.2174505794137696, "grad_norm": 13.506269090606859, "learning_rate": 7.414223145322672e-06, "loss": 2.4066, "step": 3572 }, { "epoch": 1.2177914110429449, "grad_norm": 16.208544855207588, "learning_rate": 7.412486361838589e-06, "loss": 2.2495, "step": 3573 }, { "epoch": 1.21813224267212, "grad_norm": 17.00047808621785, "learning_rate": 7.410749198860801e-06, "loss": 2.7603, "step": 3574 }, { "epoch": 1.2184730743012953, "grad_norm": 16.77865054907014, "learning_rate": 7.409011656662563e-06, "loss": 2.4175, "step": 3575 }, { "epoch": 1.2188139059304703, "grad_norm": 18.522825891218556, "learning_rate": 7.407273735517205e-06, "loss": 2.6185, "step": 3576 }, { "epoch": 1.2191547375596454, "grad_norm": 25.719382641282785, "learning_rate": 7.405535435698104e-06, "loss": 2.8405, "step": 3577 }, { "epoch": 1.2194955691888207, "grad_norm": 33.16608760979264, "learning_rate": 7.4037967574787025e-06, "loss": 2.2128, "step": 3578 }, { "epoch": 1.2198364008179958, "grad_norm": 11.372800386115012, "learning_rate": 7.4020577011325035e-06, "loss": 2.0253, "step": 3579 }, { "epoch": 1.2201772324471711, "grad_norm": 22.366444820514957, "learning_rate": 7.400318266933066e-06, "loss": 1.5555, "step": 3580 }, { "epoch": 1.2205180640763462, "grad_norm": 29.399124022101848, "learning_rate": 7.398578455154009e-06, "loss": 2.4454, "step": 3581 }, { "epoch": 1.2208588957055215, "grad_norm": 27.562563264581122, "learning_rate": 7.396838266069014e-06, "loss": 2.5242, "step": 3582 }, { "epoch": 1.2211997273346966, "grad_norm": 13.891928276533982, "learning_rate": 7.3950976999518185e-06, "loss": 2.4913, "step": 3583 }, { "epoch": 1.221540558963872, "grad_norm": 16.024094939996104, "learning_rate": 7.39335675707622e-06, "loss": 2.317, "step": 3584 }, { "epoch": 1.221881390593047, "grad_norm": 17.505714302371455, "learning_rate": 7.391615437716076e-06, "loss": 2.4136, "step": 3585 }, { "epoch": 1.2222222222222223, "grad_norm": 16.343953020760413, "learning_rate": 7.3898737421453034e-06, "loss": 1.8229, "step": 3586 }, { "epoch": 1.2225630538513974, "grad_norm": 16.97690128784145, "learning_rate": 7.388131670637878e-06, "loss": 2.1067, "step": 3587 }, { "epoch": 1.2229038854805725, "grad_norm": 14.864053574854012, "learning_rate": 7.386389223467835e-06, "loss": 2.4956, "step": 3588 }, { "epoch": 1.2232447171097478, "grad_norm": 14.119579036689775, "learning_rate": 7.3846464009092655e-06, "loss": 1.7231, "step": 3589 }, { "epoch": 1.223585548738923, "grad_norm": 16.314088045715625, "learning_rate": 7.382903203236325e-06, "loss": 2.5451, "step": 3590 }, { "epoch": 1.2239263803680982, "grad_norm": 19.38342666966621, "learning_rate": 7.381159630723225e-06, "loss": 2.8963, "step": 3591 }, { "epoch": 1.2242672119972733, "grad_norm": 15.487674623972373, "learning_rate": 7.379415683644236e-06, "loss": 2.1022, "step": 3592 }, { "epoch": 1.2246080436264486, "grad_norm": 17.92195627423995, "learning_rate": 7.377671362273689e-06, "loss": 2.1033, "step": 3593 }, { "epoch": 1.2249488752556237, "grad_norm": 16.262260137133545, "learning_rate": 7.375926666885971e-06, "loss": 1.546, "step": 3594 }, { "epoch": 1.225289706884799, "grad_norm": 15.89408210170526, "learning_rate": 7.374181597755529e-06, "loss": 2.183, "step": 3595 }, { "epoch": 1.225630538513974, "grad_norm": 18.469562931029316, "learning_rate": 7.372436155156872e-06, "loss": 2.0773, "step": 3596 }, { "epoch": 1.2259713701431494, "grad_norm": 17.12863860000587, "learning_rate": 7.370690339364565e-06, "loss": 2.6311, "step": 3597 }, { "epoch": 1.2263122017723245, "grad_norm": 15.775235916227796, "learning_rate": 7.368944150653228e-06, "loss": 2.8052, "step": 3598 }, { "epoch": 1.2266530334014996, "grad_norm": 13.794288200412472, "learning_rate": 7.367197589297547e-06, "loss": 2.398, "step": 3599 }, { "epoch": 1.2269938650306749, "grad_norm": 19.147697773223854, "learning_rate": 7.365450655572262e-06, "loss": 2.8047, "step": 3600 }, { "epoch": 1.22733469665985, "grad_norm": 15.751517694515401, "learning_rate": 7.3637033497521725e-06, "loss": 2.2046, "step": 3601 }, { "epoch": 1.2276755282890253, "grad_norm": 24.154804313763915, "learning_rate": 7.361955672112136e-06, "loss": 2.0774, "step": 3602 }, { "epoch": 1.2280163599182004, "grad_norm": 15.546371046546769, "learning_rate": 7.360207622927071e-06, "loss": 2.0272, "step": 3603 }, { "epoch": 1.2283571915473757, "grad_norm": 25.426513864822095, "learning_rate": 7.358459202471951e-06, "loss": 2.7677, "step": 3604 }, { "epoch": 1.2286980231765507, "grad_norm": 17.418597950357185, "learning_rate": 7.35671041102181e-06, "loss": 1.7659, "step": 3605 }, { "epoch": 1.229038854805726, "grad_norm": 18.729767833124114, "learning_rate": 7.35496124885174e-06, "loss": 2.4308, "step": 3606 }, { "epoch": 1.2293796864349011, "grad_norm": 18.305064868506342, "learning_rate": 7.35321171623689e-06, "loss": 2.0991, "step": 3607 }, { "epoch": 1.2297205180640765, "grad_norm": 15.373779513110566, "learning_rate": 7.3514618134524716e-06, "loss": 2.54, "step": 3608 }, { "epoch": 1.2300613496932515, "grad_norm": 19.259790957542098, "learning_rate": 7.349711540773748e-06, "loss": 2.1264, "step": 3609 }, { "epoch": 1.2304021813224266, "grad_norm": 21.47896800489412, "learning_rate": 7.347960898476045e-06, "loss": 2.781, "step": 3610 }, { "epoch": 1.230743012951602, "grad_norm": 15.161500767014147, "learning_rate": 7.346209886834746e-06, "loss": 2.5495, "step": 3611 }, { "epoch": 1.231083844580777, "grad_norm": 13.889952071557646, "learning_rate": 7.344458506125293e-06, "loss": 1.8115, "step": 3612 }, { "epoch": 1.2314246762099523, "grad_norm": 24.16475273179867, "learning_rate": 7.342706756623183e-06, "loss": 2.4175, "step": 3613 }, { "epoch": 1.2317655078391274, "grad_norm": 34.21255014185103, "learning_rate": 7.340954638603976e-06, "loss": 2.4033, "step": 3614 }, { "epoch": 1.2321063394683027, "grad_norm": 19.938915763709094, "learning_rate": 7.339202152343283e-06, "loss": 1.8758, "step": 3615 }, { "epoch": 1.2324471710974778, "grad_norm": 26.12016810456602, "learning_rate": 7.3374492981167814e-06, "loss": 2.5257, "step": 3616 }, { "epoch": 1.2327880027266531, "grad_norm": 19.1147483744932, "learning_rate": 7.3356960762002e-06, "loss": 2.1305, "step": 3617 }, { "epoch": 1.2331288343558282, "grad_norm": 14.251846406810511, "learning_rate": 7.333942486869329e-06, "loss": 2.1164, "step": 3618 }, { "epoch": 1.2334696659850035, "grad_norm": 22.259691582331016, "learning_rate": 7.3321885304000126e-06, "loss": 2.3459, "step": 3619 }, { "epoch": 1.2338104976141786, "grad_norm": 17.013063049768054, "learning_rate": 7.330434207068157e-06, "loss": 2.4156, "step": 3620 }, { "epoch": 1.2341513292433537, "grad_norm": 14.26328016968089, "learning_rate": 7.328679517149724e-06, "loss": 2.1003, "step": 3621 }, { "epoch": 1.234492160872529, "grad_norm": 22.956007543878307, "learning_rate": 7.326924460920733e-06, "loss": 2.0122, "step": 3622 }, { "epoch": 1.234832992501704, "grad_norm": 12.334238676172612, "learning_rate": 7.325169038657261e-06, "loss": 2.1148, "step": 3623 }, { "epoch": 1.2351738241308794, "grad_norm": 15.49345597405138, "learning_rate": 7.323413250635441e-06, "loss": 2.6483, "step": 3624 }, { "epoch": 1.2355146557600545, "grad_norm": 17.65315424438424, "learning_rate": 7.321657097131472e-06, "loss": 2.1299, "step": 3625 }, { "epoch": 1.2358554873892298, "grad_norm": 17.51455354817194, "learning_rate": 7.319900578421597e-06, "loss": 2.6466, "step": 3626 }, { "epoch": 1.2361963190184049, "grad_norm": 19.88197306189632, "learning_rate": 7.318143694782127e-06, "loss": 1.8308, "step": 3627 }, { "epoch": 1.2365371506475802, "grad_norm": 16.856828636480945, "learning_rate": 7.316386446489425e-06, "loss": 2.04, "step": 3628 }, { "epoch": 1.2368779822767553, "grad_norm": 10.56997087841179, "learning_rate": 7.3146288338199145e-06, "loss": 2.2035, "step": 3629 }, { "epoch": 1.2372188139059306, "grad_norm": 16.899634874085223, "learning_rate": 7.312870857050074e-06, "loss": 2.4581, "step": 3630 }, { "epoch": 1.2375596455351057, "grad_norm": 13.313648804076688, "learning_rate": 7.311112516456443e-06, "loss": 1.9576, "step": 3631 }, { "epoch": 1.2379004771642808, "grad_norm": 17.75099127621394, "learning_rate": 7.309353812315609e-06, "loss": 2.0217, "step": 3632 }, { "epoch": 1.238241308793456, "grad_norm": 14.656985683641402, "learning_rate": 7.307594744904229e-06, "loss": 2.2107, "step": 3633 }, { "epoch": 1.2385821404226312, "grad_norm": 13.06548540717677, "learning_rate": 7.305835314499011e-06, "loss": 2.2674, "step": 3634 }, { "epoch": 1.2389229720518065, "grad_norm": 21.397860634460624, "learning_rate": 7.304075521376718e-06, "loss": 2.08, "step": 3635 }, { "epoch": 1.2392638036809815, "grad_norm": 12.566087373723544, "learning_rate": 7.302315365814173e-06, "loss": 1.7831, "step": 3636 }, { "epoch": 1.2396046353101569, "grad_norm": 33.1896651854774, "learning_rate": 7.300554848088257e-06, "loss": 2.5044, "step": 3637 }, { "epoch": 1.239945466939332, "grad_norm": 23.74768981833149, "learning_rate": 7.298793968475904e-06, "loss": 2.312, "step": 3638 }, { "epoch": 1.2402862985685073, "grad_norm": 15.188595188232458, "learning_rate": 7.297032727254112e-06, "loss": 2.6253, "step": 3639 }, { "epoch": 1.2406271301976823, "grad_norm": 14.39876554462012, "learning_rate": 7.295271124699924e-06, "loss": 2.0787, "step": 3640 }, { "epoch": 1.2409679618268576, "grad_norm": 16.287596516751098, "learning_rate": 7.293509161090453e-06, "loss": 2.0401, "step": 3641 }, { "epoch": 1.2413087934560327, "grad_norm": 19.446975964759353, "learning_rate": 7.291746836702862e-06, "loss": 2.4434, "step": 3642 }, { "epoch": 1.2416496250852078, "grad_norm": 16.77775285898734, "learning_rate": 7.289984151814369e-06, "loss": 2.8362, "step": 3643 }, { "epoch": 1.2419904567143831, "grad_norm": 16.62440181717258, "learning_rate": 7.288221106702253e-06, "loss": 2.6221, "step": 3644 }, { "epoch": 1.2423312883435582, "grad_norm": 17.34875144230253, "learning_rate": 7.2864577016438494e-06, "loss": 1.6827, "step": 3645 }, { "epoch": 1.2426721199727335, "grad_norm": 16.193107764629545, "learning_rate": 7.284693936916547e-06, "loss": 2.0995, "step": 3646 }, { "epoch": 1.2430129516019086, "grad_norm": 17.238087072530522, "learning_rate": 7.2829298127977936e-06, "loss": 1.7588, "step": 3647 }, { "epoch": 1.243353783231084, "grad_norm": 11.847236587834873, "learning_rate": 7.281165329565093e-06, "loss": 2.1484, "step": 3648 }, { "epoch": 1.243694614860259, "grad_norm": 22.27607964369925, "learning_rate": 7.279400487496004e-06, "loss": 2.504, "step": 3649 }, { "epoch": 1.2440354464894343, "grad_norm": 24.64762202265775, "learning_rate": 7.277635286868145e-06, "loss": 2.4988, "step": 3650 }, { "epoch": 1.2443762781186094, "grad_norm": 18.37783823701687, "learning_rate": 7.2758697279591895e-06, "loss": 2.0012, "step": 3651 }, { "epoch": 1.2447171097477847, "grad_norm": 13.239947199227654, "learning_rate": 7.274103811046865e-06, "loss": 2.3486, "step": 3652 }, { "epoch": 1.2450579413769598, "grad_norm": 22.847529348998265, "learning_rate": 7.272337536408959e-06, "loss": 2.4851, "step": 3653 }, { "epoch": 1.2453987730061349, "grad_norm": 13.942043654602312, "learning_rate": 7.270570904323313e-06, "loss": 2.564, "step": 3654 }, { "epoch": 1.2457396046353102, "grad_norm": 20.820386011217337, "learning_rate": 7.268803915067825e-06, "loss": 2.6842, "step": 3655 }, { "epoch": 1.2460804362644853, "grad_norm": 24.097777647772805, "learning_rate": 7.267036568920449e-06, "loss": 1.983, "step": 3656 }, { "epoch": 1.2464212678936606, "grad_norm": 13.68577090247775, "learning_rate": 7.2652688661591965e-06, "loss": 1.5271, "step": 3657 }, { "epoch": 1.2467620995228357, "grad_norm": 15.951467681372325, "learning_rate": 7.263500807062134e-06, "loss": 2.3209, "step": 3658 }, { "epoch": 1.247102931152011, "grad_norm": 20.389797762689586, "learning_rate": 7.261732391907385e-06, "loss": 2.1197, "step": 3659 }, { "epoch": 1.247443762781186, "grad_norm": 18.989525721505032, "learning_rate": 7.259963620973125e-06, "loss": 2.4304, "step": 3660 }, { "epoch": 1.2477845944103612, "grad_norm": 21.74870228476153, "learning_rate": 7.258194494537592e-06, "loss": 2.0293, "step": 3661 }, { "epoch": 1.2481254260395365, "grad_norm": 21.422464289628643, "learning_rate": 7.256425012879077e-06, "loss": 2.0886, "step": 3662 }, { "epoch": 1.2484662576687118, "grad_norm": 20.153990719533265, "learning_rate": 7.254655176275925e-06, "loss": 2.0151, "step": 3663 }, { "epoch": 1.2488070892978869, "grad_norm": 19.83330937573755, "learning_rate": 7.252884985006536e-06, "loss": 2.0695, "step": 3664 }, { "epoch": 1.249147920927062, "grad_norm": 16.964472573661798, "learning_rate": 7.251114439349373e-06, "loss": 2.3528, "step": 3665 }, { "epoch": 1.2494887525562373, "grad_norm": 23.070960293333485, "learning_rate": 7.249343539582946e-06, "loss": 2.3885, "step": 3666 }, { "epoch": 1.2498295841854123, "grad_norm": 24.82480837973142, "learning_rate": 7.247572285985828e-06, "loss": 2.2924, "step": 3667 }, { "epoch": 1.2501704158145877, "grad_norm": 17.172035190634162, "learning_rate": 7.245800678836641e-06, "loss": 2.1755, "step": 3668 }, { "epoch": 1.2505112474437627, "grad_norm": 16.098259533528285, "learning_rate": 7.244028718414068e-06, "loss": 2.1169, "step": 3669 }, { "epoch": 1.250852079072938, "grad_norm": 15.1495515853279, "learning_rate": 7.242256404996843e-06, "loss": 2.8136, "step": 3670 }, { "epoch": 1.2511929107021131, "grad_norm": 15.40782236546987, "learning_rate": 7.2404837388637615e-06, "loss": 2.0665, "step": 3671 }, { "epoch": 1.2515337423312882, "grad_norm": 18.14346338438745, "learning_rate": 7.238710720293668e-06, "loss": 2.0689, "step": 3672 }, { "epoch": 1.2518745739604635, "grad_norm": 18.323794991317495, "learning_rate": 7.236937349565467e-06, "loss": 2.0104, "step": 3673 }, { "epoch": 1.2522154055896388, "grad_norm": 15.692228195390742, "learning_rate": 7.235163626958115e-06, "loss": 2.211, "step": 3674 }, { "epoch": 1.252556237218814, "grad_norm": 17.252851080542197, "learning_rate": 7.233389552750627e-06, "loss": 2.4453, "step": 3675 }, { "epoch": 1.252897068847989, "grad_norm": 14.00637277882569, "learning_rate": 7.231615127222073e-06, "loss": 2.206, "step": 3676 }, { "epoch": 1.2532379004771643, "grad_norm": 16.014570837461253, "learning_rate": 7.229840350651576e-06, "loss": 2.0001, "step": 3677 }, { "epoch": 1.2535787321063394, "grad_norm": 14.258182110252342, "learning_rate": 7.228065223318315e-06, "loss": 2.4256, "step": 3678 }, { "epoch": 1.2539195637355147, "grad_norm": 13.687875728895541, "learning_rate": 7.226289745501525e-06, "loss": 2.4321, "step": 3679 }, { "epoch": 1.2542603953646898, "grad_norm": 16.403374397464667, "learning_rate": 7.224513917480494e-06, "loss": 2.0139, "step": 3680 }, { "epoch": 1.2546012269938651, "grad_norm": 14.720103557377202, "learning_rate": 7.22273773953457e-06, "loss": 2.3864, "step": 3681 }, { "epoch": 1.2549420586230402, "grad_norm": 38.54492462279358, "learning_rate": 7.220961211943152e-06, "loss": 2.2495, "step": 3682 }, { "epoch": 1.2552828902522153, "grad_norm": 23.97868581794325, "learning_rate": 7.219184334985693e-06, "loss": 2.5508, "step": 3683 }, { "epoch": 1.2556237218813906, "grad_norm": 22.859679771810203, "learning_rate": 7.2174071089417055e-06, "loss": 1.872, "step": 3684 }, { "epoch": 1.255964553510566, "grad_norm": 20.794676308730185, "learning_rate": 7.215629534090751e-06, "loss": 2.7151, "step": 3685 }, { "epoch": 1.256305385139741, "grad_norm": 20.86996989047925, "learning_rate": 7.213851610712451e-06, "loss": 2.4518, "step": 3686 }, { "epoch": 1.256646216768916, "grad_norm": 16.44249677514919, "learning_rate": 7.21207333908648e-06, "loss": 2.6101, "step": 3687 }, { "epoch": 1.2569870483980914, "grad_norm": 14.76804563925919, "learning_rate": 7.210294719492568e-06, "loss": 1.9824, "step": 3688 }, { "epoch": 1.2573278800272665, "grad_norm": 17.93763763390976, "learning_rate": 7.208515752210498e-06, "loss": 2.5755, "step": 3689 }, { "epoch": 1.2576687116564418, "grad_norm": 11.328468724357327, "learning_rate": 7.206736437520108e-06, "loss": 2.2547, "step": 3690 }, { "epoch": 1.2580095432856169, "grad_norm": 15.539394648004325, "learning_rate": 7.204956775701291e-06, "loss": 2.3193, "step": 3691 }, { "epoch": 1.2583503749147922, "grad_norm": 14.197250277951442, "learning_rate": 7.2031767670339965e-06, "loss": 2.8538, "step": 3692 }, { "epoch": 1.2586912065439673, "grad_norm": 22.848376891239475, "learning_rate": 7.201396411798227e-06, "loss": 1.9927, "step": 3693 }, { "epoch": 1.2590320381731424, "grad_norm": 13.51183605747328, "learning_rate": 7.199615710274038e-06, "loss": 2.237, "step": 3694 }, { "epoch": 1.2593728698023177, "grad_norm": 16.20305595763514, "learning_rate": 7.197834662741541e-06, "loss": 2.4897, "step": 3695 }, { "epoch": 1.259713701431493, "grad_norm": 11.322045556683854, "learning_rate": 7.196053269480904e-06, "loss": 1.6763, "step": 3696 }, { "epoch": 1.260054533060668, "grad_norm": 13.643094697521581, "learning_rate": 7.194271530772345e-06, "loss": 2.0263, "step": 3697 }, { "epoch": 1.2603953646898431, "grad_norm": 17.3611405173959, "learning_rate": 7.192489446896138e-06, "loss": 2.0476, "step": 3698 }, { "epoch": 1.2607361963190185, "grad_norm": 20.94899664277064, "learning_rate": 7.190707018132614e-06, "loss": 2.9461, "step": 3699 }, { "epoch": 1.2610770279481935, "grad_norm": 15.575273299437612, "learning_rate": 7.188924244762155e-06, "loss": 2.35, "step": 3700 }, { "epoch": 1.2614178595773688, "grad_norm": 25.72149759538044, "learning_rate": 7.187141127065201e-06, "loss": 1.7777, "step": 3701 }, { "epoch": 1.261758691206544, "grad_norm": 17.950575339975554, "learning_rate": 7.185357665322239e-06, "loss": 1.8057, "step": 3702 }, { "epoch": 1.2620995228357192, "grad_norm": 13.587031926458616, "learning_rate": 7.183573859813817e-06, "loss": 1.9299, "step": 3703 }, { "epoch": 1.2624403544648943, "grad_norm": 13.791604680695215, "learning_rate": 7.181789710820536e-06, "loss": 2.3635, "step": 3704 }, { "epoch": 1.2627811860940694, "grad_norm": 16.140014145535208, "learning_rate": 7.180005218623048e-06, "loss": 1.7355, "step": 3705 }, { "epoch": 1.2631220177232447, "grad_norm": 15.963108292827416, "learning_rate": 7.178220383502059e-06, "loss": 2.1832, "step": 3706 }, { "epoch": 1.26346284935242, "grad_norm": 20.994247530587163, "learning_rate": 7.176435205738335e-06, "loss": 2.4604, "step": 3707 }, { "epoch": 1.2638036809815951, "grad_norm": 15.946137899438389, "learning_rate": 7.1746496856126865e-06, "loss": 2.2899, "step": 3708 }, { "epoch": 1.2641445126107702, "grad_norm": 13.64137519680237, "learning_rate": 7.172863823405988e-06, "loss": 2.5356, "step": 3709 }, { "epoch": 1.2644853442399455, "grad_norm": 17.51475938214446, "learning_rate": 7.17107761939916e-06, "loss": 2.0512, "step": 3710 }, { "epoch": 1.2648261758691206, "grad_norm": 17.512868223116282, "learning_rate": 7.169291073873179e-06, "loss": 2.1393, "step": 3711 }, { "epoch": 1.265167007498296, "grad_norm": 17.060912144559335, "learning_rate": 7.167504187109076e-06, "loss": 1.6684, "step": 3712 }, { "epoch": 1.265507839127471, "grad_norm": 26.214947038258945, "learning_rate": 7.165716959387939e-06, "loss": 2.347, "step": 3713 }, { "epoch": 1.2658486707566463, "grad_norm": 13.966110211556815, "learning_rate": 7.163929390990902e-06, "loss": 2.0988, "step": 3714 }, { "epoch": 1.2661895023858214, "grad_norm": 18.518786001706943, "learning_rate": 7.162141482199158e-06, "loss": 2.253, "step": 3715 }, { "epoch": 1.2665303340149965, "grad_norm": 12.1668961272498, "learning_rate": 7.1603532332939485e-06, "loss": 2.1297, "step": 3716 }, { "epoch": 1.2668711656441718, "grad_norm": 14.788341995781185, "learning_rate": 7.158564644556578e-06, "loss": 2.145, "step": 3717 }, { "epoch": 1.267211997273347, "grad_norm": 32.02556275925098, "learning_rate": 7.156775716268398e-06, "loss": 2.4607, "step": 3718 }, { "epoch": 1.2675528289025222, "grad_norm": 16.166922398182674, "learning_rate": 7.15498644871081e-06, "loss": 2.4897, "step": 3719 }, { "epoch": 1.2678936605316973, "grad_norm": 21.445102445737547, "learning_rate": 7.153196842165275e-06, "loss": 2.2605, "step": 3720 }, { "epoch": 1.2682344921608726, "grad_norm": 16.164061186165192, "learning_rate": 7.151406896913305e-06, "loss": 1.9331, "step": 3721 }, { "epoch": 1.2685753237900477, "grad_norm": 31.12364610588024, "learning_rate": 7.149616613236467e-06, "loss": 2.1843, "step": 3722 }, { "epoch": 1.268916155419223, "grad_norm": 22.818928331955632, "learning_rate": 7.147825991416378e-06, "loss": 1.4311, "step": 3723 }, { "epoch": 1.269256987048398, "grad_norm": 17.508669148512013, "learning_rate": 7.146035031734712e-06, "loss": 2.0645, "step": 3724 }, { "epoch": 1.2695978186775734, "grad_norm": 19.474090759446494, "learning_rate": 7.1442437344731905e-06, "loss": 2.2306, "step": 3725 }, { "epoch": 1.2699386503067485, "grad_norm": 18.269335355242923, "learning_rate": 7.142452099913595e-06, "loss": 2.0307, "step": 3726 }, { "epoch": 1.2702794819359235, "grad_norm": 22.22154946992752, "learning_rate": 7.140660128337756e-06, "loss": 2.344, "step": 3727 }, { "epoch": 1.2706203135650989, "grad_norm": 15.99404764224144, "learning_rate": 7.138867820027557e-06, "loss": 2.1198, "step": 3728 }, { "epoch": 1.2709611451942742, "grad_norm": 22.541173080196703, "learning_rate": 7.137075175264933e-06, "loss": 2.2798, "step": 3729 }, { "epoch": 1.2713019768234493, "grad_norm": 13.392917296373598, "learning_rate": 7.135282194331881e-06, "loss": 2.4277, "step": 3730 }, { "epoch": 1.2716428084526243, "grad_norm": 15.705109751428056, "learning_rate": 7.133488877510437e-06, "loss": 1.9805, "step": 3731 }, { "epoch": 1.2719836400817996, "grad_norm": 14.690096405002484, "learning_rate": 7.131695225082702e-06, "loss": 2.4047, "step": 3732 }, { "epoch": 1.2723244717109747, "grad_norm": 13.724640227952932, "learning_rate": 7.129901237330818e-06, "loss": 2.164, "step": 3733 }, { "epoch": 1.27266530334015, "grad_norm": 10.34029065124664, "learning_rate": 7.128106914536994e-06, "loss": 1.9271, "step": 3734 }, { "epoch": 1.2730061349693251, "grad_norm": 18.06973657312537, "learning_rate": 7.126312256983482e-06, "loss": 2.1255, "step": 3735 }, { "epoch": 1.2733469665985004, "grad_norm": 13.892712278341005, "learning_rate": 7.124517264952585e-06, "loss": 2.1176, "step": 3736 }, { "epoch": 1.2736877982276755, "grad_norm": 44.355496836618784, "learning_rate": 7.122721938726666e-06, "loss": 2.5669, "step": 3737 }, { "epoch": 1.2740286298568506, "grad_norm": 10.753398221079161, "learning_rate": 7.120926278588136e-06, "loss": 2.2778, "step": 3738 }, { "epoch": 1.274369461486026, "grad_norm": 16.288167855662188, "learning_rate": 7.11913028481946e-06, "loss": 2.3772, "step": 3739 }, { "epoch": 1.274710293115201, "grad_norm": 16.34578825036036, "learning_rate": 7.117333957703153e-06, "loss": 2.3134, "step": 3740 }, { "epoch": 1.2750511247443763, "grad_norm": 18.478408938875653, "learning_rate": 7.115537297521786e-06, "loss": 2.0647, "step": 3741 }, { "epoch": 1.2753919563735514, "grad_norm": 17.586263583059125, "learning_rate": 7.113740304557981e-06, "loss": 2.1819, "step": 3742 }, { "epoch": 1.2757327880027267, "grad_norm": 16.996301232399176, "learning_rate": 7.111942979094413e-06, "loss": 1.7532, "step": 3743 }, { "epoch": 1.2760736196319018, "grad_norm": 13.833686982485709, "learning_rate": 7.110145321413806e-06, "loss": 2.2939, "step": 3744 }, { "epoch": 1.276414451261077, "grad_norm": 28.710953585619997, "learning_rate": 7.10834733179894e-06, "loss": 2.4974, "step": 3745 }, { "epoch": 1.2767552828902522, "grad_norm": 16.178509682054422, "learning_rate": 7.106549010532645e-06, "loss": 2.7058, "step": 3746 }, { "epoch": 1.2770961145194275, "grad_norm": 17.450003349016598, "learning_rate": 7.104750357897807e-06, "loss": 1.8309, "step": 3747 }, { "epoch": 1.2774369461486026, "grad_norm": 16.863796153632872, "learning_rate": 7.102951374177358e-06, "loss": 2.4437, "step": 3748 }, { "epoch": 1.2777777777777777, "grad_norm": 17.874556975013288, "learning_rate": 7.101152059654289e-06, "loss": 2.8323, "step": 3749 }, { "epoch": 1.278118609406953, "grad_norm": 17.267761995059164, "learning_rate": 7.099352414611632e-06, "loss": 2.3078, "step": 3750 }, { "epoch": 1.278459441036128, "grad_norm": 19.40396564310511, "learning_rate": 7.097552439332485e-06, "loss": 2.5031, "step": 3751 }, { "epoch": 1.2788002726653034, "grad_norm": 13.082954656403249, "learning_rate": 7.0957521340999915e-06, "loss": 2.6615, "step": 3752 }, { "epoch": 1.2791411042944785, "grad_norm": 14.336417135367721, "learning_rate": 7.093951499197343e-06, "loss": 2.147, "step": 3753 }, { "epoch": 1.2794819359236538, "grad_norm": 16.58989622862361, "learning_rate": 7.092150534907788e-06, "loss": 2.4459, "step": 3754 }, { "epoch": 1.2798227675528289, "grad_norm": 49.52086014494663, "learning_rate": 7.090349241514626e-06, "loss": 2.8512, "step": 3755 }, { "epoch": 1.280163599182004, "grad_norm": 14.29664636275805, "learning_rate": 7.088547619301207e-06, "loss": 1.4556, "step": 3756 }, { "epoch": 1.2805044308111793, "grad_norm": 23.579285432048326, "learning_rate": 7.086745668550933e-06, "loss": 2.2616, "step": 3757 }, { "epoch": 1.2808452624403546, "grad_norm": 20.192980823659056, "learning_rate": 7.084943389547259e-06, "loss": 2.3728, "step": 3758 }, { "epoch": 1.2811860940695297, "grad_norm": 17.07722230485793, "learning_rate": 7.083140782573689e-06, "loss": 2.7283, "step": 3759 }, { "epoch": 1.2815269256987047, "grad_norm": 14.544329762573188, "learning_rate": 7.0813378479137826e-06, "loss": 2.1605, "step": 3760 }, { "epoch": 1.28186775732788, "grad_norm": 18.985501763131865, "learning_rate": 7.079534585851148e-06, "loss": 2.299, "step": 3761 }, { "epoch": 1.2822085889570551, "grad_norm": 16.11022061984173, "learning_rate": 7.077730996669444e-06, "loss": 2.3366, "step": 3762 }, { "epoch": 1.2825494205862304, "grad_norm": 16.390645666488037, "learning_rate": 7.0759270806523825e-06, "loss": 2.6201, "step": 3763 }, { "epoch": 1.2828902522154055, "grad_norm": 16.988516140041288, "learning_rate": 7.074122838083729e-06, "loss": 2.2052, "step": 3764 }, { "epoch": 1.2832310838445808, "grad_norm": 24.14292466984292, "learning_rate": 7.072318269247296e-06, "loss": 1.7218, "step": 3765 }, { "epoch": 1.283571915473756, "grad_norm": 26.556960851123417, "learning_rate": 7.0705133744269486e-06, "loss": 2.1515, "step": 3766 }, { "epoch": 1.283912747102931, "grad_norm": 33.996840165558986, "learning_rate": 7.068708153906606e-06, "loss": 2.2266, "step": 3767 }, { "epoch": 1.2842535787321063, "grad_norm": 15.01834847954008, "learning_rate": 7.066902607970236e-06, "loss": 2.2618, "step": 3768 }, { "epoch": 1.2845944103612816, "grad_norm": 25.759240342123856, "learning_rate": 7.065096736901858e-06, "loss": 2.0937, "step": 3769 }, { "epoch": 1.2849352419904567, "grad_norm": 22.256072098184465, "learning_rate": 7.063290540985542e-06, "loss": 2.4018, "step": 3770 }, { "epoch": 1.2852760736196318, "grad_norm": 15.752480682852333, "learning_rate": 7.06148402050541e-06, "loss": 2.4036, "step": 3771 }, { "epoch": 1.2856169052488071, "grad_norm": 20.17090155966045, "learning_rate": 7.0596771757456374e-06, "loss": 2.2025, "step": 3772 }, { "epoch": 1.2859577368779822, "grad_norm": 15.399228724624912, "learning_rate": 7.057870006990444e-06, "loss": 2.2929, "step": 3773 }, { "epoch": 1.2862985685071575, "grad_norm": 15.270486766229137, "learning_rate": 7.056062514524107e-06, "loss": 2.4827, "step": 3774 }, { "epoch": 1.2866394001363326, "grad_norm": 21.082439953456948, "learning_rate": 7.0542546986309525e-06, "loss": 2.5273, "step": 3775 }, { "epoch": 1.286980231765508, "grad_norm": 16.0760950361454, "learning_rate": 7.052446559595355e-06, "loss": 2.4812, "step": 3776 }, { "epoch": 1.287321063394683, "grad_norm": 27.059540784712492, "learning_rate": 7.050638097701744e-06, "loss": 2.3431, "step": 3777 }, { "epoch": 1.287661895023858, "grad_norm": 15.321601262344549, "learning_rate": 7.048829313234599e-06, "loss": 2.1457, "step": 3778 }, { "epoch": 1.2880027266530334, "grad_norm": 22.260117436393212, "learning_rate": 7.047020206478445e-06, "loss": 2.1484, "step": 3779 }, { "epoch": 1.2883435582822087, "grad_norm": 14.19641294874661, "learning_rate": 7.045210777717863e-06, "loss": 2.0489, "step": 3780 }, { "epoch": 1.2886843899113838, "grad_norm": 23.004512751203315, "learning_rate": 7.043401027237487e-06, "loss": 2.8292, "step": 3781 }, { "epoch": 1.2890252215405589, "grad_norm": 13.607049189246933, "learning_rate": 7.041590955321993e-06, "loss": 2.8227, "step": 3782 }, { "epoch": 1.2893660531697342, "grad_norm": 19.5666032634259, "learning_rate": 7.039780562256114e-06, "loss": 2.7048, "step": 3783 }, { "epoch": 1.2897068847989093, "grad_norm": 17.145176813623035, "learning_rate": 7.037969848324634e-06, "loss": 2.028, "step": 3784 }, { "epoch": 1.2900477164280846, "grad_norm": 10.761088024799015, "learning_rate": 7.0361588138123834e-06, "loss": 1.662, "step": 3785 }, { "epoch": 1.2903885480572597, "grad_norm": 11.002414673730952, "learning_rate": 7.034347459004248e-06, "loss": 2.0564, "step": 3786 }, { "epoch": 1.290729379686435, "grad_norm": 12.746279826635107, "learning_rate": 7.032535784185157e-06, "loss": 2.2679, "step": 3787 }, { "epoch": 1.29107021131561, "grad_norm": 21.962513130618053, "learning_rate": 7.030723789640096e-06, "loss": 2.0115, "step": 3788 }, { "epoch": 1.2914110429447851, "grad_norm": 20.403204844049817, "learning_rate": 7.0289114756541e-06, "loss": 2.211, "step": 3789 }, { "epoch": 1.2917518745739605, "grad_norm": 19.27932942532655, "learning_rate": 7.027098842512252e-06, "loss": 1.8851, "step": 3790 }, { "epoch": 1.2920927062031358, "grad_norm": 13.483887844269695, "learning_rate": 7.0252858904996865e-06, "loss": 2.0915, "step": 3791 }, { "epoch": 1.2924335378323109, "grad_norm": 20.544548244301815, "learning_rate": 7.023472619901588e-06, "loss": 1.6905, "step": 3792 }, { "epoch": 1.292774369461486, "grad_norm": 15.79545120136861, "learning_rate": 7.021659031003192e-06, "loss": 1.7279, "step": 3793 }, { "epoch": 1.2931152010906612, "grad_norm": 11.666670389612552, "learning_rate": 7.019845124089781e-06, "loss": 2.1689, "step": 3794 }, { "epoch": 1.2934560327198363, "grad_norm": 15.20685481589324, "learning_rate": 7.018030899446695e-06, "loss": 2.6791, "step": 3795 }, { "epoch": 1.2937968643490116, "grad_norm": 20.214502943483648, "learning_rate": 7.016216357359312e-06, "loss": 2.5583, "step": 3796 }, { "epoch": 1.2941376959781867, "grad_norm": 16.28502856956678, "learning_rate": 7.014401498113071e-06, "loss": 1.9494, "step": 3797 }, { "epoch": 1.294478527607362, "grad_norm": 15.76952442977511, "learning_rate": 7.0125863219934555e-06, "loss": 1.9297, "step": 3798 }, { "epoch": 1.2948193592365371, "grad_norm": 14.133704175332756, "learning_rate": 7.010770829285999e-06, "loss": 2.3135, "step": 3799 }, { "epoch": 1.2951601908657122, "grad_norm": 18.41931850714591, "learning_rate": 7.008955020276287e-06, "loss": 2.2711, "step": 3800 }, { "epoch": 1.2955010224948875, "grad_norm": 23.706394855297354, "learning_rate": 7.007138895249954e-06, "loss": 1.6989, "step": 3801 }, { "epoch": 1.2958418541240628, "grad_norm": 13.719301313900244, "learning_rate": 7.005322454492683e-06, "loss": 2.5631, "step": 3802 }, { "epoch": 1.296182685753238, "grad_norm": 13.736088366879637, "learning_rate": 7.003505698290206e-06, "loss": 2.1676, "step": 3803 }, { "epoch": 1.296523517382413, "grad_norm": 16.18381550836006, "learning_rate": 7.001688626928308e-06, "loss": 2.7626, "step": 3804 }, { "epoch": 1.2968643490115883, "grad_norm": 17.88112989367456, "learning_rate": 6.999871240692819e-06, "loss": 2.2959, "step": 3805 }, { "epoch": 1.2972051806407634, "grad_norm": 17.943608471140614, "learning_rate": 6.998053539869625e-06, "loss": 2.6957, "step": 3806 }, { "epoch": 1.2975460122699387, "grad_norm": 15.176219953897414, "learning_rate": 6.996235524744654e-06, "loss": 2.0043, "step": 3807 }, { "epoch": 1.2978868438991138, "grad_norm": 17.00877462941904, "learning_rate": 6.99441719560389e-06, "loss": 2.4033, "step": 3808 }, { "epoch": 1.298227675528289, "grad_norm": 13.562789337247633, "learning_rate": 6.992598552733361e-06, "loss": 2.1669, "step": 3809 }, { "epoch": 1.2985685071574642, "grad_norm": 17.701036969394373, "learning_rate": 6.990779596419146e-06, "loss": 2.4451, "step": 3810 }, { "epoch": 1.2989093387866393, "grad_norm": 31.158446010449282, "learning_rate": 6.988960326947377e-06, "loss": 1.8016, "step": 3811 }, { "epoch": 1.2992501704158146, "grad_norm": 18.069948939811734, "learning_rate": 6.987140744604233e-06, "loss": 2.4876, "step": 3812 }, { "epoch": 1.29959100204499, "grad_norm": 13.331018302460079, "learning_rate": 6.985320849675936e-06, "loss": 2.2038, "step": 3813 }, { "epoch": 1.299931833674165, "grad_norm": 23.814632441704408, "learning_rate": 6.983500642448768e-06, "loss": 2.5977, "step": 3814 }, { "epoch": 1.30027266530334, "grad_norm": 14.327957420922402, "learning_rate": 6.981680123209053e-06, "loss": 2.3641, "step": 3815 }, { "epoch": 1.3006134969325154, "grad_norm": 21.71344146825744, "learning_rate": 6.979859292243166e-06, "loss": 2.5209, "step": 3816 }, { "epoch": 1.3009543285616905, "grad_norm": 11.235556770709811, "learning_rate": 6.97803814983753e-06, "loss": 2.1399, "step": 3817 }, { "epoch": 1.3012951601908658, "grad_norm": 15.765563185230238, "learning_rate": 6.97621669627862e-06, "loss": 2.1947, "step": 3818 }, { "epoch": 1.3016359918200409, "grad_norm": 19.802488915710846, "learning_rate": 6.974394931852957e-06, "loss": 2.031, "step": 3819 }, { "epoch": 1.3019768234492162, "grad_norm": 16.113011389938585, "learning_rate": 6.9725728568471115e-06, "loss": 2.2338, "step": 3820 }, { "epoch": 1.3023176550783913, "grad_norm": 21.525374451225744, "learning_rate": 6.970750471547702e-06, "loss": 2.7755, "step": 3821 }, { "epoch": 1.3026584867075663, "grad_norm": 17.266303511026507, "learning_rate": 6.968927776241399e-06, "loss": 1.5499, "step": 3822 }, { "epoch": 1.3029993183367417, "grad_norm": 22.38979318558162, "learning_rate": 6.96710477121492e-06, "loss": 2.2827, "step": 3823 }, { "epoch": 1.303340149965917, "grad_norm": 12.726958361894487, "learning_rate": 6.965281456755029e-06, "loss": 2.7568, "step": 3824 }, { "epoch": 1.303680981595092, "grad_norm": 14.238078979057878, "learning_rate": 6.963457833148542e-06, "loss": 2.2191, "step": 3825 }, { "epoch": 1.3040218132242671, "grad_norm": 14.853074182453383, "learning_rate": 6.961633900682321e-06, "loss": 2.6203, "step": 3826 }, { "epoch": 1.3043626448534424, "grad_norm": 19.79831389407027, "learning_rate": 6.959809659643279e-06, "loss": 2.452, "step": 3827 }, { "epoch": 1.3047034764826175, "grad_norm": 14.531131930603108, "learning_rate": 6.957985110318375e-06, "loss": 2.3774, "step": 3828 }, { "epoch": 1.3050443081117928, "grad_norm": 22.75988063470874, "learning_rate": 6.956160252994621e-06, "loss": 2.6234, "step": 3829 }, { "epoch": 1.305385139740968, "grad_norm": 14.721229115063348, "learning_rate": 6.9543350879590695e-06, "loss": 2.0447, "step": 3830 }, { "epoch": 1.3057259713701432, "grad_norm": 17.766640867718724, "learning_rate": 6.952509615498831e-06, "loss": 1.9995, "step": 3831 }, { "epoch": 1.3060668029993183, "grad_norm": 41.907392634743424, "learning_rate": 6.950683835901058e-06, "loss": 3.4796, "step": 3832 }, { "epoch": 1.3064076346284934, "grad_norm": 18.76698499741493, "learning_rate": 6.9488577494529505e-06, "loss": 1.9274, "step": 3833 }, { "epoch": 1.3067484662576687, "grad_norm": 15.920612965141467, "learning_rate": 6.947031356441761e-06, "loss": 2.348, "step": 3834 }, { "epoch": 1.307089297886844, "grad_norm": 11.264950480488507, "learning_rate": 6.94520465715479e-06, "loss": 1.8341, "step": 3835 }, { "epoch": 1.3074301295160191, "grad_norm": 21.89454174662031, "learning_rate": 6.943377651879382e-06, "loss": 2.454, "step": 3836 }, { "epoch": 1.3077709611451942, "grad_norm": 16.342115785360992, "learning_rate": 6.941550340902934e-06, "loss": 2.3671, "step": 3837 }, { "epoch": 1.3081117927743695, "grad_norm": 15.893470486127335, "learning_rate": 6.9397227245128874e-06, "loss": 1.8579, "step": 3838 }, { "epoch": 1.3084526244035446, "grad_norm": 26.6255229480798, "learning_rate": 6.937894802996734e-06, "loss": 2.1495, "step": 3839 }, { "epoch": 1.30879345603272, "grad_norm": 17.23120689851566, "learning_rate": 6.9360665766420165e-06, "loss": 2.4746, "step": 3840 }, { "epoch": 1.309134287661895, "grad_norm": 21.53173103278389, "learning_rate": 6.9342380457363176e-06, "loss": 1.8712, "step": 3841 }, { "epoch": 1.3094751192910703, "grad_norm": 20.698300110930877, "learning_rate": 6.932409210567275e-06, "loss": 2.3424, "step": 3842 }, { "epoch": 1.3098159509202454, "grad_norm": 16.892418105218574, "learning_rate": 6.93058007142257e-06, "loss": 2.3896, "step": 3843 }, { "epoch": 1.3101567825494205, "grad_norm": 16.50291823044476, "learning_rate": 6.928750628589935e-06, "loss": 2.2649, "step": 3844 }, { "epoch": 1.3104976141785958, "grad_norm": 23.887986586962686, "learning_rate": 6.92692088235715e-06, "loss": 2.1136, "step": 3845 }, { "epoch": 1.310838445807771, "grad_norm": 31.893850825636118, "learning_rate": 6.925090833012038e-06, "loss": 2.435, "step": 3846 }, { "epoch": 1.3111792774369462, "grad_norm": 17.038578214236743, "learning_rate": 6.9232604808424734e-06, "loss": 2.9094, "step": 3847 }, { "epoch": 1.3115201090661213, "grad_norm": 36.44132066024481, "learning_rate": 6.921429826136381e-06, "loss": 2.1493, "step": 3848 }, { "epoch": 1.3118609406952966, "grad_norm": 16.874113622746687, "learning_rate": 6.919598869181728e-06, "loss": 2.4017, "step": 3849 }, { "epoch": 1.3122017723244717, "grad_norm": 13.961713399703555, "learning_rate": 6.917767610266532e-06, "loss": 1.8282, "step": 3850 }, { "epoch": 1.312542603953647, "grad_norm": 14.153486178986439, "learning_rate": 6.915936049678855e-06, "loss": 1.6796, "step": 3851 }, { "epoch": 1.312883435582822, "grad_norm": 14.110829660547475, "learning_rate": 6.9141041877068105e-06, "loss": 2.2185, "step": 3852 }, { "epoch": 1.3132242672119974, "grad_norm": 11.232971768337123, "learning_rate": 6.912272024638559e-06, "loss": 2.3297, "step": 3853 }, { "epoch": 1.3135650988411725, "grad_norm": 19.58429388575918, "learning_rate": 6.910439560762305e-06, "loss": 2.1426, "step": 3854 }, { "epoch": 1.3139059304703475, "grad_norm": 19.029083995641212, "learning_rate": 6.9086067963663035e-06, "loss": 2.3334, "step": 3855 }, { "epoch": 1.3142467620995228, "grad_norm": 15.097944336104568, "learning_rate": 6.9067737317388535e-06, "loss": 2.431, "step": 3856 }, { "epoch": 1.3145875937286982, "grad_norm": 15.291953220666416, "learning_rate": 6.904940367168308e-06, "loss": 1.8898, "step": 3857 }, { "epoch": 1.3149284253578732, "grad_norm": 15.036577290849394, "learning_rate": 6.903106702943058e-06, "loss": 2.1919, "step": 3858 }, { "epoch": 1.3152692569870483, "grad_norm": 15.696085941190859, "learning_rate": 6.901272739351548e-06, "loss": 2.1346, "step": 3859 }, { "epoch": 1.3156100886162236, "grad_norm": 42.334378276211574, "learning_rate": 6.899438476682268e-06, "loss": 2.5064, "step": 3860 }, { "epoch": 1.3159509202453987, "grad_norm": 17.722484695755906, "learning_rate": 6.897603915223753e-06, "loss": 2.113, "step": 3861 }, { "epoch": 1.316291751874574, "grad_norm": 18.79507572915995, "learning_rate": 6.8957690552645896e-06, "loss": 2.5326, "step": 3862 }, { "epoch": 1.3166325835037491, "grad_norm": 18.1943425112976, "learning_rate": 6.893933897093406e-06, "loss": 1.8083, "step": 3863 }, { "epoch": 1.3169734151329244, "grad_norm": 11.799467558981958, "learning_rate": 6.892098440998881e-06, "loss": 1.75, "step": 3864 }, { "epoch": 1.3173142467620995, "grad_norm": 16.679822131383403, "learning_rate": 6.890262687269741e-06, "loss": 2.2385, "step": 3865 }, { "epoch": 1.3176550783912746, "grad_norm": 14.598476483988199, "learning_rate": 6.8884266361947545e-06, "loss": 2.393, "step": 3866 }, { "epoch": 1.31799591002045, "grad_norm": 17.89356812516282, "learning_rate": 6.886590288062741e-06, "loss": 2.2009, "step": 3867 }, { "epoch": 1.3183367416496252, "grad_norm": 18.590437797510663, "learning_rate": 6.884753643162565e-06, "loss": 2.722, "step": 3868 }, { "epoch": 1.3186775732788003, "grad_norm": 21.528736081202418, "learning_rate": 6.882916701783138e-06, "loss": 3.1708, "step": 3869 }, { "epoch": 1.3190184049079754, "grad_norm": 18.839244312680087, "learning_rate": 6.8810794642134185e-06, "loss": 2.2638, "step": 3870 }, { "epoch": 1.3193592365371507, "grad_norm": 14.608862100968034, "learning_rate": 6.87924193074241e-06, "loss": 2.5404, "step": 3871 }, { "epoch": 1.3197000681663258, "grad_norm": 18.427000878562087, "learning_rate": 6.877404101659166e-06, "loss": 2.3884, "step": 3872 }, { "epoch": 1.320040899795501, "grad_norm": 22.065773912202683, "learning_rate": 6.875565977252783e-06, "loss": 2.5673, "step": 3873 }, { "epoch": 1.3203817314246762, "grad_norm": 19.576269174829772, "learning_rate": 6.873727557812406e-06, "loss": 2.815, "step": 3874 }, { "epoch": 1.3207225630538515, "grad_norm": 17.10120052163096, "learning_rate": 6.871888843627226e-06, "loss": 2.3887, "step": 3875 }, { "epoch": 1.3210633946830266, "grad_norm": 24.16216454444984, "learning_rate": 6.870049834986479e-06, "loss": 2.5343, "step": 3876 }, { "epoch": 1.3214042263122017, "grad_norm": 15.806096200698017, "learning_rate": 6.868210532179447e-06, "loss": 1.9626, "step": 3877 }, { "epoch": 1.321745057941377, "grad_norm": 18.135263125124396, "learning_rate": 6.866370935495464e-06, "loss": 2.2331, "step": 3878 }, { "epoch": 1.322085889570552, "grad_norm": 14.323540270727536, "learning_rate": 6.864531045223901e-06, "loss": 2.0516, "step": 3879 }, { "epoch": 1.3224267211997274, "grad_norm": 37.71207770076337, "learning_rate": 6.862690861654184e-06, "loss": 1.8953, "step": 3880 }, { "epoch": 1.3227675528289025, "grad_norm": 13.633724129407634, "learning_rate": 6.860850385075777e-06, "loss": 2.3467, "step": 3881 }, { "epoch": 1.3231083844580778, "grad_norm": 14.90965598982734, "learning_rate": 6.859009615778199e-06, "loss": 2.5551, "step": 3882 }, { "epoch": 1.3234492160872529, "grad_norm": 21.7211468237817, "learning_rate": 6.857168554051008e-06, "loss": 2.7332, "step": 3883 }, { "epoch": 1.3237900477164282, "grad_norm": 18.31080426131572, "learning_rate": 6.85532720018381e-06, "loss": 2.1016, "step": 3884 }, { "epoch": 1.3241308793456033, "grad_norm": 23.606419565618964, "learning_rate": 6.853485554466258e-06, "loss": 2.3942, "step": 3885 }, { "epoch": 1.3244717109747786, "grad_norm": 14.84780720228075, "learning_rate": 6.85164361718805e-06, "loss": 2.5824, "step": 3886 }, { "epoch": 1.3248125426039536, "grad_norm": 16.01953339890038, "learning_rate": 6.849801388638929e-06, "loss": 2.3101, "step": 3887 }, { "epoch": 1.3251533742331287, "grad_norm": 13.969837033547517, "learning_rate": 6.847958869108689e-06, "loss": 1.9326, "step": 3888 }, { "epoch": 1.325494205862304, "grad_norm": 22.151160404559853, "learning_rate": 6.846116058887159e-06, "loss": 2.2138, "step": 3889 }, { "epoch": 1.3258350374914791, "grad_norm": 14.47859385097851, "learning_rate": 6.8442729582642245e-06, "loss": 2.4827, "step": 3890 }, { "epoch": 1.3261758691206544, "grad_norm": 18.01067543605298, "learning_rate": 6.842429567529815e-06, "loss": 2.3365, "step": 3891 }, { "epoch": 1.3265167007498295, "grad_norm": 14.596166241583779, "learning_rate": 6.840585886973897e-06, "loss": 2.7574, "step": 3892 }, { "epoch": 1.3268575323790048, "grad_norm": 16.90890573980185, "learning_rate": 6.838741916886493e-06, "loss": 2.1978, "step": 3893 }, { "epoch": 1.32719836400818, "grad_norm": 17.75469843037621, "learning_rate": 6.836897657557666e-06, "loss": 2.2504, "step": 3894 }, { "epoch": 1.3275391956373552, "grad_norm": 16.41835900070921, "learning_rate": 6.835053109277525e-06, "loss": 2.1525, "step": 3895 }, { "epoch": 1.3278800272665303, "grad_norm": 22.960236610631313, "learning_rate": 6.833208272336227e-06, "loss": 2.7451, "step": 3896 }, { "epoch": 1.3282208588957056, "grad_norm": 21.40550883112957, "learning_rate": 6.831363147023968e-06, "loss": 2.1508, "step": 3897 }, { "epoch": 1.3285616905248807, "grad_norm": 15.090253184796694, "learning_rate": 6.829517733630995e-06, "loss": 2.3375, "step": 3898 }, { "epoch": 1.3289025221540558, "grad_norm": 18.086659181070058, "learning_rate": 6.827672032447602e-06, "loss": 2.0773, "step": 3899 }, { "epoch": 1.329243353783231, "grad_norm": 16.060954192303036, "learning_rate": 6.825826043764121e-06, "loss": 2.6167, "step": 3900 }, { "epoch": 1.3295841854124062, "grad_norm": 21.79962843725842, "learning_rate": 6.823979767870936e-06, "loss": 2.1114, "step": 3901 }, { "epoch": 1.3299250170415815, "grad_norm": 19.094688767103232, "learning_rate": 6.822133205058472e-06, "loss": 2.2896, "step": 3902 }, { "epoch": 1.3302658486707566, "grad_norm": 13.715316023411846, "learning_rate": 6.820286355617202e-06, "loss": 2.0012, "step": 3903 }, { "epoch": 1.330606680299932, "grad_norm": 14.649565002096471, "learning_rate": 6.818439219837642e-06, "loss": 2.2898, "step": 3904 }, { "epoch": 1.330947511929107, "grad_norm": 15.79462281622963, "learning_rate": 6.816591798010354e-06, "loss": 1.9373, "step": 3905 }, { "epoch": 1.331288343558282, "grad_norm": 16.34796310845031, "learning_rate": 6.814744090425945e-06, "loss": 2.3773, "step": 3906 }, { "epoch": 1.3316291751874574, "grad_norm": 18.138759245882046, "learning_rate": 6.812896097375065e-06, "loss": 2.6431, "step": 3907 }, { "epoch": 1.3319700068166327, "grad_norm": 16.57133826933695, "learning_rate": 6.811047819148413e-06, "loss": 2.0701, "step": 3908 }, { "epoch": 1.3323108384458078, "grad_norm": 14.277452310457255, "learning_rate": 6.8091992560367305e-06, "loss": 2.0092, "step": 3909 }, { "epoch": 1.3326516700749829, "grad_norm": 14.713774267725489, "learning_rate": 6.807350408330802e-06, "loss": 1.935, "step": 3910 }, { "epoch": 1.3329925017041582, "grad_norm": 17.264725490728086, "learning_rate": 6.80550127632146e-06, "loss": 2.4897, "step": 3911 }, { "epoch": 1.3333333333333333, "grad_norm": 19.393539640646576, "learning_rate": 6.80365186029958e-06, "loss": 2.111, "step": 3912 }, { "epoch": 1.3336741649625086, "grad_norm": 20.135367266973134, "learning_rate": 6.801802160556084e-06, "loss": 1.4603, "step": 3913 }, { "epoch": 1.3340149965916837, "grad_norm": 12.417195162624207, "learning_rate": 6.799952177381933e-06, "loss": 1.8347, "step": 3914 }, { "epoch": 1.334355828220859, "grad_norm": 14.254410002981151, "learning_rate": 6.798101911068139e-06, "loss": 2.3993, "step": 3915 }, { "epoch": 1.334696659850034, "grad_norm": 14.460864690519069, "learning_rate": 6.796251361905759e-06, "loss": 2.1682, "step": 3916 }, { "epoch": 1.3350374914792091, "grad_norm": 16.41066647033498, "learning_rate": 6.794400530185889e-06, "loss": 2.6489, "step": 3917 }, { "epoch": 1.3353783231083844, "grad_norm": 35.791416385958385, "learning_rate": 6.7925494161996705e-06, "loss": 3.0755, "step": 3918 }, { "epoch": 1.3357191547375598, "grad_norm": 22.279546770559367, "learning_rate": 6.790698020238294e-06, "loss": 2.2229, "step": 3919 }, { "epoch": 1.3360599863667348, "grad_norm": 22.73376290380986, "learning_rate": 6.788846342592991e-06, "loss": 1.4827, "step": 3920 }, { "epoch": 1.33640081799591, "grad_norm": 17.332685406293752, "learning_rate": 6.786994383555037e-06, "loss": 2.261, "step": 3921 }, { "epoch": 1.3367416496250852, "grad_norm": 15.355793195837107, "learning_rate": 6.785142143415754e-06, "loss": 2.3056, "step": 3922 }, { "epoch": 1.3370824812542603, "grad_norm": 22.700955769998025, "learning_rate": 6.783289622466503e-06, "loss": 2.3986, "step": 3923 }, { "epoch": 1.3374233128834356, "grad_norm": 17.820343585335078, "learning_rate": 6.781436820998697e-06, "loss": 2.7847, "step": 3924 }, { "epoch": 1.3377641445126107, "grad_norm": 22.028047698722347, "learning_rate": 6.779583739303789e-06, "loss": 2.0335, "step": 3925 }, { "epoch": 1.338104976141786, "grad_norm": 28.16750353305819, "learning_rate": 6.7777303776732725e-06, "loss": 1.6513, "step": 3926 }, { "epoch": 1.3384458077709611, "grad_norm": 29.432767640020277, "learning_rate": 6.775876736398692e-06, "loss": 2.673, "step": 3927 }, { "epoch": 1.3387866394001362, "grad_norm": 15.915401777218342, "learning_rate": 6.774022815771631e-06, "loss": 2.4791, "step": 3928 }, { "epoch": 1.3391274710293115, "grad_norm": 17.05504661342197, "learning_rate": 6.77216861608372e-06, "loss": 2.0134, "step": 3929 }, { "epoch": 1.3394683026584868, "grad_norm": 13.095722629978235, "learning_rate": 6.770314137626631e-06, "loss": 1.9494, "step": 3930 }, { "epoch": 1.339809134287662, "grad_norm": 15.702277118978483, "learning_rate": 6.768459380692079e-06, "loss": 2.5766, "step": 3931 }, { "epoch": 1.340149965916837, "grad_norm": 15.427214706424712, "learning_rate": 6.766604345571827e-06, "loss": 2.5998, "step": 3932 }, { "epoch": 1.3404907975460123, "grad_norm": 21.935174795316502, "learning_rate": 6.764749032557683e-06, "loss": 2.4382, "step": 3933 }, { "epoch": 1.3408316291751874, "grad_norm": 31.962066301241855, "learning_rate": 6.762893441941487e-06, "loss": 2.9641, "step": 3934 }, { "epoch": 1.3411724608043627, "grad_norm": 19.176846574776288, "learning_rate": 6.761037574015136e-06, "loss": 2.5409, "step": 3935 }, { "epoch": 1.3415132924335378, "grad_norm": 13.45491074271513, "learning_rate": 6.7591814290705645e-06, "loss": 2.0225, "step": 3936 }, { "epoch": 1.341854124062713, "grad_norm": 14.1703637418656, "learning_rate": 6.757325007399751e-06, "loss": 2.0603, "step": 3937 }, { "epoch": 1.3421949556918882, "grad_norm": 26.590833448257772, "learning_rate": 6.755468309294718e-06, "loss": 2.1169, "step": 3938 }, { "epoch": 1.3425357873210633, "grad_norm": 22.340231165867152, "learning_rate": 6.753611335047533e-06, "loss": 2.5642, "step": 3939 }, { "epoch": 1.3428766189502386, "grad_norm": 20.21293242497697, "learning_rate": 6.7517540849503025e-06, "loss": 2.3219, "step": 3940 }, { "epoch": 1.3432174505794139, "grad_norm": 16.041895635972786, "learning_rate": 6.749896559295183e-06, "loss": 2.5965, "step": 3941 }, { "epoch": 1.343558282208589, "grad_norm": 17.038333298579545, "learning_rate": 6.748038758374368e-06, "loss": 2.4708, "step": 3942 }, { "epoch": 1.343899113837764, "grad_norm": 19.392688586225063, "learning_rate": 6.746180682480098e-06, "loss": 2.1638, "step": 3943 }, { "epoch": 1.3442399454669394, "grad_norm": 28.262957873147588, "learning_rate": 6.744322331904655e-06, "loss": 2.4218, "step": 3944 }, { "epoch": 1.3445807770961145, "grad_norm": 15.507477541579416, "learning_rate": 6.7424637069403655e-06, "loss": 1.9211, "step": 3945 }, { "epoch": 1.3449216087252898, "grad_norm": 17.261159988348105, "learning_rate": 6.7406048078795985e-06, "loss": 2.2782, "step": 3946 }, { "epoch": 1.3452624403544649, "grad_norm": 16.982298690851398, "learning_rate": 6.738745635014767e-06, "loss": 2.6624, "step": 3947 }, { "epoch": 1.3456032719836402, "grad_norm": 20.186247990272243, "learning_rate": 6.736886188638324e-06, "loss": 2.3136, "step": 3948 }, { "epoch": 1.3459441036128152, "grad_norm": 19.272146544234623, "learning_rate": 6.735026469042771e-06, "loss": 2.5187, "step": 3949 }, { "epoch": 1.3462849352419903, "grad_norm": 14.42585529234335, "learning_rate": 6.7331664765206476e-06, "loss": 2.1949, "step": 3950 }, { "epoch": 1.3466257668711656, "grad_norm": 16.66782917254955, "learning_rate": 6.731306211364538e-06, "loss": 2.724, "step": 3951 }, { "epoch": 1.346966598500341, "grad_norm": 19.790437080407226, "learning_rate": 6.729445673867069e-06, "loss": 2.2639, "step": 3952 }, { "epoch": 1.347307430129516, "grad_norm": 14.128910958377219, "learning_rate": 6.727584864320911e-06, "loss": 2.3215, "step": 3953 }, { "epoch": 1.3476482617586911, "grad_norm": 16.375063237189135, "learning_rate": 6.725723783018776e-06, "loss": 2.1036, "step": 3954 }, { "epoch": 1.3479890933878664, "grad_norm": 18.948694573825097, "learning_rate": 6.723862430253421e-06, "loss": 2.558, "step": 3955 }, { "epoch": 1.3483299250170415, "grad_norm": 21.44175995027011, "learning_rate": 6.722000806317645e-06, "loss": 1.9551, "step": 3956 }, { "epoch": 1.3486707566462168, "grad_norm": 15.110184244331426, "learning_rate": 6.720138911504283e-06, "loss": 2.3549, "step": 3957 }, { "epoch": 1.349011588275392, "grad_norm": 2742.972761314463, "learning_rate": 6.718276746106227e-06, "loss": 2.0309, "step": 3958 }, { "epoch": 1.3493524199045672, "grad_norm": 16.26635429272673, "learning_rate": 6.716414310416397e-06, "loss": 2.2225, "step": 3959 }, { "epoch": 1.3496932515337423, "grad_norm": 14.302994101674273, "learning_rate": 6.714551604727763e-06, "loss": 2.281, "step": 3960 }, { "epoch": 1.3500340831629174, "grad_norm": 25.484301440705355, "learning_rate": 6.712688629333337e-06, "loss": 2.3777, "step": 3961 }, { "epoch": 1.3503749147920927, "grad_norm": 28.818765837355418, "learning_rate": 6.710825384526172e-06, "loss": 2.0625, "step": 3962 }, { "epoch": 1.350715746421268, "grad_norm": 18.786343937076815, "learning_rate": 6.7089618705993635e-06, "loss": 2.3281, "step": 3963 }, { "epoch": 1.351056578050443, "grad_norm": 15.584873240743278, "learning_rate": 6.7070980878460505e-06, "loss": 3.188, "step": 3964 }, { "epoch": 1.3513974096796182, "grad_norm": 20.971025709596386, "learning_rate": 6.70523403655941e-06, "loss": 1.5928, "step": 3965 }, { "epoch": 1.3517382413087935, "grad_norm": 36.43797068226448, "learning_rate": 6.7033697170326694e-06, "loss": 2.7645, "step": 3966 }, { "epoch": 1.3520790729379686, "grad_norm": 16.086409505250273, "learning_rate": 6.701505129559093e-06, "loss": 2.4911, "step": 3967 }, { "epoch": 1.352419904567144, "grad_norm": 19.901051839506227, "learning_rate": 6.699640274431985e-06, "loss": 2.8267, "step": 3968 }, { "epoch": 1.352760736196319, "grad_norm": 15.027273247288564, "learning_rate": 6.697775151944694e-06, "loss": 2.5942, "step": 3969 }, { "epoch": 1.3531015678254943, "grad_norm": 23.15630792601581, "learning_rate": 6.695909762390615e-06, "loss": 2.1881, "step": 3970 }, { "epoch": 1.3534423994546694, "grad_norm": 20.70328240891375, "learning_rate": 6.69404410606318e-06, "loss": 2.7542, "step": 3971 }, { "epoch": 1.3537832310838445, "grad_norm": 23.98601888556263, "learning_rate": 6.692178183255862e-06, "loss": 2.2346, "step": 3972 }, { "epoch": 1.3541240627130198, "grad_norm": 19.851549329193475, "learning_rate": 6.690311994262182e-06, "loss": 2.2817, "step": 3973 }, { "epoch": 1.354464894342195, "grad_norm": 11.376590899206189, "learning_rate": 6.688445539375693e-06, "loss": 2.0361, "step": 3974 }, { "epoch": 1.3548057259713702, "grad_norm": 20.623996979381854, "learning_rate": 6.686578818890003e-06, "loss": 2.0397, "step": 3975 }, { "epoch": 1.3551465576005453, "grad_norm": 19.366699725711303, "learning_rate": 6.6847118330987486e-06, "loss": 2.5811, "step": 3976 }, { "epoch": 1.3554873892297206, "grad_norm": 15.148130041279943, "learning_rate": 6.682844582295617e-06, "loss": 2.6696, "step": 3977 }, { "epoch": 1.3558282208588956, "grad_norm": 17.173217148497073, "learning_rate": 6.6809770667743335e-06, "loss": 1.4998, "step": 3978 }, { "epoch": 1.356169052488071, "grad_norm": 16.33245211118213, "learning_rate": 6.679109286828665e-06, "loss": 2.5324, "step": 3979 }, { "epoch": 1.356509884117246, "grad_norm": 20.59107767209744, "learning_rate": 6.6772412427524224e-06, "loss": 2.1547, "step": 3980 }, { "epoch": 1.3568507157464214, "grad_norm": 19.849801984701337, "learning_rate": 6.675372934839457e-06, "loss": 2.3262, "step": 3981 }, { "epoch": 1.3571915473755964, "grad_norm": 32.871637031805676, "learning_rate": 6.673504363383657e-06, "loss": 2.7198, "step": 3982 }, { "epoch": 1.3575323790047715, "grad_norm": 23.358786323113634, "learning_rate": 6.6716355286789595e-06, "loss": 2.9496, "step": 3983 }, { "epoch": 1.3578732106339468, "grad_norm": 14.108882060454794, "learning_rate": 6.669766431019341e-06, "loss": 1.4995, "step": 3984 }, { "epoch": 1.3582140422631221, "grad_norm": 16.868898199158863, "learning_rate": 6.667897070698815e-06, "loss": 2.2347, "step": 3985 }, { "epoch": 1.3585548738922972, "grad_norm": 16.681616599083593, "learning_rate": 6.666027448011438e-06, "loss": 2.6437, "step": 3986 }, { "epoch": 1.3588957055214723, "grad_norm": 18.816473097242017, "learning_rate": 6.664157563251313e-06, "loss": 2.8454, "step": 3987 }, { "epoch": 1.3592365371506476, "grad_norm": 19.894184538749062, "learning_rate": 6.66228741671258e-06, "loss": 1.868, "step": 3988 }, { "epoch": 1.3595773687798227, "grad_norm": 16.723783289051973, "learning_rate": 6.660417008689417e-06, "loss": 2.4035, "step": 3989 }, { "epoch": 1.359918200408998, "grad_norm": 12.590346941430214, "learning_rate": 6.65854633947605e-06, "loss": 2.4324, "step": 3990 }, { "epoch": 1.360259032038173, "grad_norm": 21.77209743162936, "learning_rate": 6.656675409366741e-06, "loss": 2.4762, "step": 3991 }, { "epoch": 1.3605998636673484, "grad_norm": 13.44267242681174, "learning_rate": 6.654804218655796e-06, "loss": 2.2917, "step": 3992 }, { "epoch": 1.3609406952965235, "grad_norm": 17.701274496013788, "learning_rate": 6.65293276763756e-06, "loss": 2.6656, "step": 3993 }, { "epoch": 1.3612815269256986, "grad_norm": 17.890984882122222, "learning_rate": 6.651061056606418e-06, "loss": 1.7633, "step": 3994 }, { "epoch": 1.361622358554874, "grad_norm": 19.18213471432306, "learning_rate": 6.649189085856801e-06, "loss": 2.1932, "step": 3995 }, { "epoch": 1.3619631901840492, "grad_norm": 14.686212209876405, "learning_rate": 6.647316855683175e-06, "loss": 1.9913, "step": 3996 }, { "epoch": 1.3623040218132243, "grad_norm": 18.68365824248722, "learning_rate": 6.64544436638005e-06, "loss": 1.912, "step": 3997 }, { "epoch": 1.3626448534423994, "grad_norm": 30.75097274977735, "learning_rate": 6.643571618241977e-06, "loss": 1.9378, "step": 3998 }, { "epoch": 1.3629856850715747, "grad_norm": 17.292137457572448, "learning_rate": 6.641698611563545e-06, "loss": 2.7281, "step": 3999 }, { "epoch": 1.3633265167007498, "grad_norm": 17.67170547869491, "learning_rate": 6.639825346639386e-06, "loss": 2.323, "step": 4000 }, { "epoch": 1.363667348329925, "grad_norm": 12.712052027131477, "learning_rate": 6.637951823764173e-06, "loss": 2.081, "step": 4001 }, { "epoch": 1.3640081799591002, "grad_norm": 68.13401727721093, "learning_rate": 6.636078043232616e-06, "loss": 2.0664, "step": 4002 }, { "epoch": 1.3643490115882755, "grad_norm": 20.217286208560058, "learning_rate": 6.634204005339473e-06, "loss": 1.9044, "step": 4003 }, { "epoch": 1.3646898432174506, "grad_norm": 14.782250362795386, "learning_rate": 6.632329710379532e-06, "loss": 2.5283, "step": 4004 }, { "epoch": 1.3650306748466257, "grad_norm": 16.375843687473143, "learning_rate": 6.630455158647631e-06, "loss": 2.3356, "step": 4005 }, { "epoch": 1.365371506475801, "grad_norm": 21.534898197466504, "learning_rate": 6.6285803504386415e-06, "loss": 1.8482, "step": 4006 }, { "epoch": 1.3657123381049763, "grad_norm": 14.761266922225166, "learning_rate": 6.626705286047482e-06, "loss": 1.8946, "step": 4007 }, { "epoch": 1.3660531697341514, "grad_norm": 16.50662413342029, "learning_rate": 6.624829965769103e-06, "loss": 2.1722, "step": 4008 }, { "epoch": 1.3663940013633264, "grad_norm": 19.251580403279423, "learning_rate": 6.622954389898505e-06, "loss": 2.7596, "step": 4009 }, { "epoch": 1.3667348329925018, "grad_norm": 12.056685919100195, "learning_rate": 6.621078558730719e-06, "loss": 2.3449, "step": 4010 }, { "epoch": 1.3670756646216768, "grad_norm": 29.862114602729264, "learning_rate": 6.619202472560823e-06, "loss": 2.1358, "step": 4011 }, { "epoch": 1.3674164962508522, "grad_norm": 20.6706124252245, "learning_rate": 6.617326131683932e-06, "loss": 1.9836, "step": 4012 }, { "epoch": 1.3677573278800272, "grad_norm": 11.97283836544764, "learning_rate": 6.615449536395203e-06, "loss": 2.0862, "step": 4013 }, { "epoch": 1.3680981595092025, "grad_norm": 16.762829392396153, "learning_rate": 6.613572686989831e-06, "loss": 2.3208, "step": 4014 }, { "epoch": 1.3684389911383776, "grad_norm": 15.465443861048868, "learning_rate": 6.611695583763053e-06, "loss": 1.9743, "step": 4015 }, { "epoch": 1.3687798227675527, "grad_norm": 11.35089050475214, "learning_rate": 6.6098182270101415e-06, "loss": 1.6009, "step": 4016 }, { "epoch": 1.369120654396728, "grad_norm": 22.18151261938851, "learning_rate": 6.607940617026416e-06, "loss": 1.9887, "step": 4017 }, { "epoch": 1.3694614860259033, "grad_norm": 18.843116936258617, "learning_rate": 6.606062754107232e-06, "loss": 2.4372, "step": 4018 }, { "epoch": 1.3698023176550784, "grad_norm": 16.628988007952554, "learning_rate": 6.604184638547981e-06, "loss": 1.9778, "step": 4019 }, { "epoch": 1.3701431492842535, "grad_norm": 14.94171855064862, "learning_rate": 6.602306270644101e-06, "loss": 2.3785, "step": 4020 }, { "epoch": 1.3704839809134288, "grad_norm": 13.593436378567938, "learning_rate": 6.6004276506910684e-06, "loss": 1.897, "step": 4021 }, { "epoch": 1.370824812542604, "grad_norm": 18.592813023255882, "learning_rate": 6.598548778984393e-06, "loss": 2.5464, "step": 4022 }, { "epoch": 1.3711656441717792, "grad_norm": 10.853595315923817, "learning_rate": 6.596669655819632e-06, "loss": 2.2601, "step": 4023 }, { "epoch": 1.3715064758009543, "grad_norm": 20.042366304423293, "learning_rate": 6.594790281492378e-06, "loss": 2.5854, "step": 4024 }, { "epoch": 1.3718473074301296, "grad_norm": 16.513404280483122, "learning_rate": 6.592910656298264e-06, "loss": 1.8443, "step": 4025 }, { "epoch": 1.3721881390593047, "grad_norm": 17.564937681026382, "learning_rate": 6.591030780532963e-06, "loss": 2.2527, "step": 4026 }, { "epoch": 1.3725289706884798, "grad_norm": 29.576600324453846, "learning_rate": 6.589150654492187e-06, "loss": 2.2793, "step": 4027 }, { "epoch": 1.372869802317655, "grad_norm": 16.38545823734742, "learning_rate": 6.587270278471687e-06, "loss": 2.4684, "step": 4028 }, { "epoch": 1.3732106339468302, "grad_norm": 15.6548697439115, "learning_rate": 6.585389652767253e-06, "loss": 2.9369, "step": 4029 }, { "epoch": 1.3735514655760055, "grad_norm": 14.032668236161104, "learning_rate": 6.5835087776747165e-06, "loss": 2.5064, "step": 4030 }, { "epoch": 1.3738922972051806, "grad_norm": 17.14349040280224, "learning_rate": 6.581627653489945e-06, "loss": 1.3395, "step": 4031 }, { "epoch": 1.3742331288343559, "grad_norm": 17.82702243603661, "learning_rate": 6.5797462805088475e-06, "loss": 2.2301, "step": 4032 }, { "epoch": 1.374573960463531, "grad_norm": 18.486420089066666, "learning_rate": 6.577864659027371e-06, "loss": 2.3194, "step": 4033 }, { "epoch": 1.3749147920927063, "grad_norm": 20.87687558313877, "learning_rate": 6.575982789341503e-06, "loss": 2.3011, "step": 4034 }, { "epoch": 1.3752556237218814, "grad_norm": 13.728471907029181, "learning_rate": 6.57410067174727e-06, "loss": 2.0545, "step": 4035 }, { "epoch": 1.3755964553510567, "grad_norm": 17.448253526879572, "learning_rate": 6.5722183065407345e-06, "loss": 2.4056, "step": 4036 }, { "epoch": 1.3759372869802318, "grad_norm": 25.150875228570023, "learning_rate": 6.5703356940179974e-06, "loss": 2.3097, "step": 4037 }, { "epoch": 1.3762781186094069, "grad_norm": 15.127248435831987, "learning_rate": 6.568452834475209e-06, "loss": 2.7579, "step": 4038 }, { "epoch": 1.3766189502385822, "grad_norm": 19.238802074282056, "learning_rate": 6.566569728208544e-06, "loss": 2.7834, "step": 4039 }, { "epoch": 1.3769597818677572, "grad_norm": 14.85067762157095, "learning_rate": 6.564686375514226e-06, "loss": 1.9136, "step": 4040 }, { "epoch": 1.3773006134969326, "grad_norm": 17.02647148358015, "learning_rate": 6.562802776688511e-06, "loss": 2.1712, "step": 4041 }, { "epoch": 1.3776414451261076, "grad_norm": 13.08960960764839, "learning_rate": 6.560918932027698e-06, "loss": 2.2136, "step": 4042 }, { "epoch": 1.377982276755283, "grad_norm": 16.24222422853547, "learning_rate": 6.5590348418281245e-06, "loss": 2.0946, "step": 4043 }, { "epoch": 1.378323108384458, "grad_norm": 18.285014953506455, "learning_rate": 6.557150506386163e-06, "loss": 2.1049, "step": 4044 }, { "epoch": 1.3786639400136331, "grad_norm": 13.814695341223445, "learning_rate": 6.555265925998229e-06, "loss": 1.9096, "step": 4045 }, { "epoch": 1.3790047716428084, "grad_norm": 13.02718234434643, "learning_rate": 6.553381100960774e-06, "loss": 2.119, "step": 4046 }, { "epoch": 1.3793456032719837, "grad_norm": 23.033395614848438, "learning_rate": 6.5514960315702865e-06, "loss": 2.4611, "step": 4047 }, { "epoch": 1.3796864349011588, "grad_norm": 21.55558739668274, "learning_rate": 6.549610718123298e-06, "loss": 1.5679, "step": 4048 }, { "epoch": 1.380027266530334, "grad_norm": 11.811051260671974, "learning_rate": 6.547725160916375e-06, "loss": 2.0442, "step": 4049 }, { "epoch": 1.3803680981595092, "grad_norm": 23.316635812133164, "learning_rate": 6.545839360246124e-06, "loss": 2.1392, "step": 4050 }, { "epoch": 1.3807089297886843, "grad_norm": 14.994687417617131, "learning_rate": 6.543953316409186e-06, "loss": 2.2499, "step": 4051 }, { "epoch": 1.3810497614178596, "grad_norm": 19.089471706555113, "learning_rate": 6.542067029702248e-06, "loss": 2.325, "step": 4052 }, { "epoch": 1.3813905930470347, "grad_norm": 15.07025300026965, "learning_rate": 6.540180500422025e-06, "loss": 2.3912, "step": 4053 }, { "epoch": 1.38173142467621, "grad_norm": 18.368759588493422, "learning_rate": 6.538293728865278e-06, "loss": 2.2036, "step": 4054 }, { "epoch": 1.382072256305385, "grad_norm": 416.18742037813195, "learning_rate": 6.536406715328807e-06, "loss": 2.4325, "step": 4055 }, { "epoch": 1.3824130879345602, "grad_norm": 23.636985591705283, "learning_rate": 6.534519460109441e-06, "loss": 2.5124, "step": 4056 }, { "epoch": 1.3827539195637355, "grad_norm": 14.717799705904612, "learning_rate": 6.5326319635040565e-06, "loss": 2.3464, "step": 4057 }, { "epoch": 1.3830947511929108, "grad_norm": 16.18448222390933, "learning_rate": 6.530744225809562e-06, "loss": 2.2514, "step": 4058 }, { "epoch": 1.383435582822086, "grad_norm": 17.816015744328855, "learning_rate": 6.528856247322908e-06, "loss": 2.0599, "step": 4059 }, { "epoch": 1.383776414451261, "grad_norm": 17.90812231147436, "learning_rate": 6.526968028341081e-06, "loss": 1.8762, "step": 4060 }, { "epoch": 1.3841172460804363, "grad_norm": 19.74809377383867, "learning_rate": 6.5250795691611034e-06, "loss": 2.1113, "step": 4061 }, { "epoch": 1.3844580777096114, "grad_norm": 15.059273287793626, "learning_rate": 6.5231908700800385e-06, "loss": 2.4504, "step": 4062 }, { "epoch": 1.3847989093387867, "grad_norm": 17.00077566657081, "learning_rate": 6.521301931394987e-06, "loss": 2.382, "step": 4063 }, { "epoch": 1.3851397409679618, "grad_norm": 14.740718791593597, "learning_rate": 6.519412753403085e-06, "loss": 1.8498, "step": 4064 }, { "epoch": 1.385480572597137, "grad_norm": 20.584209527588705, "learning_rate": 6.517523336401508e-06, "loss": 1.9443, "step": 4065 }, { "epoch": 1.3858214042263122, "grad_norm": 25.597056907611815, "learning_rate": 6.51563368068747e-06, "loss": 2.4438, "step": 4066 }, { "epoch": 1.3861622358554873, "grad_norm": 16.961414728238708, "learning_rate": 6.513743786558221e-06, "loss": 2.2819, "step": 4067 }, { "epoch": 1.3865030674846626, "grad_norm": 17.1046155410895, "learning_rate": 6.5118536543110475e-06, "loss": 2.5748, "step": 4068 }, { "epoch": 1.3868438991138379, "grad_norm": 20.39730259755839, "learning_rate": 6.509963284243278e-06, "loss": 1.798, "step": 4069 }, { "epoch": 1.387184730743013, "grad_norm": 13.157044170022662, "learning_rate": 6.508072676652272e-06, "loss": 2.152, "step": 4070 }, { "epoch": 1.387525562372188, "grad_norm": 9.977540315981646, "learning_rate": 6.506181831835429e-06, "loss": 1.7747, "step": 4071 }, { "epoch": 1.3878663940013634, "grad_norm": 51.838248201196144, "learning_rate": 6.50429075009019e-06, "loss": 2.3495, "step": 4072 }, { "epoch": 1.3882072256305384, "grad_norm": 16.006541506248055, "learning_rate": 6.502399431714027e-06, "loss": 2.174, "step": 4073 }, { "epoch": 1.3885480572597138, "grad_norm": 22.3515317539378, "learning_rate": 6.500507877004454e-06, "loss": 2.2558, "step": 4074 }, { "epoch": 1.3888888888888888, "grad_norm": 16.676019688673847, "learning_rate": 6.498616086259017e-06, "loss": 2.6179, "step": 4075 }, { "epoch": 1.3892297205180641, "grad_norm": 12.160248574830206, "learning_rate": 6.496724059775304e-06, "loss": 2.2462, "step": 4076 }, { "epoch": 1.3895705521472392, "grad_norm": 19.89994411306739, "learning_rate": 6.49483179785094e-06, "loss": 1.9553, "step": 4077 }, { "epoch": 1.3899113837764143, "grad_norm": 22.876641473526966, "learning_rate": 6.492939300783582e-06, "loss": 2.2315, "step": 4078 }, { "epoch": 1.3902522154055896, "grad_norm": 14.991333300072895, "learning_rate": 6.491046568870929e-06, "loss": 2.5941, "step": 4079 }, { "epoch": 1.390593047034765, "grad_norm": 15.914751322769293, "learning_rate": 6.489153602410716e-06, "loss": 2.7665, "step": 4080 }, { "epoch": 1.39093387866394, "grad_norm": 15.681191988725045, "learning_rate": 6.487260401700713e-06, "loss": 2.4018, "step": 4081 }, { "epoch": 1.3912747102931151, "grad_norm": 15.123392258219752, "learning_rate": 6.485366967038729e-06, "loss": 2.5182, "step": 4082 }, { "epoch": 1.3916155419222904, "grad_norm": 19.742107821568283, "learning_rate": 6.4834732987226065e-06, "loss": 2.185, "step": 4083 }, { "epoch": 1.3919563735514655, "grad_norm": 14.659445679105813, "learning_rate": 6.4815793970502295e-06, "loss": 1.7686, "step": 4084 }, { "epoch": 1.3922972051806408, "grad_norm": 21.95513690617308, "learning_rate": 6.479685262319516e-06, "loss": 2.5647, "step": 4085 }, { "epoch": 1.392638036809816, "grad_norm": 23.005480644990797, "learning_rate": 6.477790894828422e-06, "loss": 2.4519, "step": 4086 }, { "epoch": 1.3929788684389912, "grad_norm": 25.810367871797613, "learning_rate": 6.475896294874935e-06, "loss": 1.7914, "step": 4087 }, { "epoch": 1.3933197000681663, "grad_norm": 13.445514289278618, "learning_rate": 6.474001462757087e-06, "loss": 2.4808, "step": 4088 }, { "epoch": 1.3936605316973414, "grad_norm": 12.119944412943243, "learning_rate": 6.4721063987729395e-06, "loss": 1.9577, "step": 4089 }, { "epoch": 1.3940013633265167, "grad_norm": 19.149250303003743, "learning_rate": 6.470211103220596e-06, "loss": 2.6152, "step": 4090 }, { "epoch": 1.394342194955692, "grad_norm": 15.937716949155401, "learning_rate": 6.4683155763981946e-06, "loss": 2.748, "step": 4091 }, { "epoch": 1.394683026584867, "grad_norm": 13.322521154554362, "learning_rate": 6.4664198186039075e-06, "loss": 2.3051, "step": 4092 }, { "epoch": 1.3950238582140422, "grad_norm": 34.771514194464906, "learning_rate": 6.4645238301359445e-06, "loss": 1.9937, "step": 4093 }, { "epoch": 1.3953646898432175, "grad_norm": 14.464032841418115, "learning_rate": 6.462627611292556e-06, "loss": 2.4611, "step": 4094 }, { "epoch": 1.3957055214723926, "grad_norm": 12.120505301708688, "learning_rate": 6.4607311623720186e-06, "loss": 1.5425, "step": 4095 }, { "epoch": 1.3960463531015679, "grad_norm": 15.231063034211667, "learning_rate": 6.458834483672655e-06, "loss": 2.3392, "step": 4096 }, { "epoch": 1.396387184730743, "grad_norm": 29.135546116577828, "learning_rate": 6.456937575492821e-06, "loss": 2.8971, "step": 4097 }, { "epoch": 1.3967280163599183, "grad_norm": 17.201598008768457, "learning_rate": 6.455040438130906e-06, "loss": 2.7372, "step": 4098 }, { "epoch": 1.3970688479890934, "grad_norm": 14.106023149445445, "learning_rate": 6.453143071885337e-06, "loss": 2.5583, "step": 4099 }, { "epoch": 1.3974096796182685, "grad_norm": 13.251687855890035, "learning_rate": 6.451245477054579e-06, "loss": 2.0171, "step": 4100 }, { "epoch": 1.3977505112474438, "grad_norm": 17.61178936693371, "learning_rate": 6.449347653937131e-06, "loss": 2.3619, "step": 4101 }, { "epoch": 1.398091342876619, "grad_norm": 18.694930775981494, "learning_rate": 6.447449602831527e-06, "loss": 2.5008, "step": 4102 }, { "epoch": 1.3984321745057942, "grad_norm": 19.209663012577874, "learning_rate": 6.44555132403634e-06, "loss": 2.062, "step": 4103 }, { "epoch": 1.3987730061349692, "grad_norm": 18.24571456352589, "learning_rate": 6.443652817850173e-06, "loss": 2.5617, "step": 4104 }, { "epoch": 1.3991138377641446, "grad_norm": 14.06117034829641, "learning_rate": 6.441754084571672e-06, "loss": 2.27, "step": 4105 }, { "epoch": 1.3994546693933196, "grad_norm": 18.658003717348084, "learning_rate": 6.439855124499514e-06, "loss": 2.2428, "step": 4106 }, { "epoch": 1.399795501022495, "grad_norm": 16.510613573780113, "learning_rate": 6.437955937932413e-06, "loss": 1.8777, "step": 4107 }, { "epoch": 1.40013633265167, "grad_norm": 15.16941048725686, "learning_rate": 6.4360565251691184e-06, "loss": 1.909, "step": 4108 }, { "epoch": 1.4004771642808453, "grad_norm": 14.247384478164912, "learning_rate": 6.434156886508417e-06, "loss": 2.4311, "step": 4109 }, { "epoch": 1.4008179959100204, "grad_norm": 20.163006926665293, "learning_rate": 6.432257022249128e-06, "loss": 2.3731, "step": 4110 }, { "epoch": 1.4011588275391955, "grad_norm": 12.611694157383754, "learning_rate": 6.430356932690109e-06, "loss": 2.2791, "step": 4111 }, { "epoch": 1.4014996591683708, "grad_norm": 12.608135200737784, "learning_rate": 6.428456618130248e-06, "loss": 2.0172, "step": 4112 }, { "epoch": 1.4018404907975461, "grad_norm": 12.1409340760151, "learning_rate": 6.426556078868475e-06, "loss": 2.4051, "step": 4113 }, { "epoch": 1.4021813224267212, "grad_norm": 17.848624619061592, "learning_rate": 6.424655315203754e-06, "loss": 2.1296, "step": 4114 }, { "epoch": 1.4025221540558963, "grad_norm": 16.416457531910574, "learning_rate": 6.422754327435079e-06, "loss": 2.4566, "step": 4115 }, { "epoch": 1.4028629856850716, "grad_norm": 18.817352908618325, "learning_rate": 6.420853115861485e-06, "loss": 2.4618, "step": 4116 }, { "epoch": 1.4032038173142467, "grad_norm": 23.18760941071446, "learning_rate": 6.418951680782039e-06, "loss": 2.5911, "step": 4117 }, { "epoch": 1.403544648943422, "grad_norm": 15.039375027779045, "learning_rate": 6.4170500224958464e-06, "loss": 2.3492, "step": 4118 }, { "epoch": 1.403885480572597, "grad_norm": 18.429429967019285, "learning_rate": 6.415148141302043e-06, "loss": 2.0229, "step": 4119 }, { "epoch": 1.4042263122017724, "grad_norm": 14.230769749092802, "learning_rate": 6.413246037499806e-06, "loss": 2.01, "step": 4120 }, { "epoch": 1.4045671438309475, "grad_norm": 23.54049759151644, "learning_rate": 6.411343711388341e-06, "loss": 2.2121, "step": 4121 }, { "epoch": 1.4049079754601226, "grad_norm": 15.64969096467556, "learning_rate": 6.409441163266889e-06, "loss": 2.141, "step": 4122 }, { "epoch": 1.405248807089298, "grad_norm": 19.55806493353418, "learning_rate": 6.4075383934347345e-06, "loss": 2.2184, "step": 4123 }, { "epoch": 1.4055896387184732, "grad_norm": 21.745485225788897, "learning_rate": 6.405635402191186e-06, "loss": 1.9559, "step": 4124 }, { "epoch": 1.4059304703476483, "grad_norm": 20.175399725427596, "learning_rate": 6.403732189835595e-06, "loss": 2.6058, "step": 4125 }, { "epoch": 1.4062713019768234, "grad_norm": 17.92500363332867, "learning_rate": 6.40182875666734e-06, "loss": 2.3318, "step": 4126 }, { "epoch": 1.4066121336059987, "grad_norm": 15.682507790664987, "learning_rate": 6.399925102985842e-06, "loss": 2.3658, "step": 4127 }, { "epoch": 1.4069529652351738, "grad_norm": 19.114881402399035, "learning_rate": 6.398021229090554e-06, "loss": 2.1565, "step": 4128 }, { "epoch": 1.407293796864349, "grad_norm": 16.004587300449412, "learning_rate": 6.3961171352809605e-06, "loss": 2.281, "step": 4129 }, { "epoch": 1.4076346284935242, "grad_norm": 12.614857262550869, "learning_rate": 6.394212821856583e-06, "loss": 1.9879, "step": 4130 }, { "epoch": 1.4079754601226995, "grad_norm": 13.41797626183804, "learning_rate": 6.39230828911698e-06, "loss": 2.3234, "step": 4131 }, { "epoch": 1.4083162917518746, "grad_norm": 13.342986940035779, "learning_rate": 6.39040353736174e-06, "loss": 2.2872, "step": 4132 }, { "epoch": 1.4086571233810496, "grad_norm": 14.750294524433418, "learning_rate": 6.388498566890488e-06, "loss": 2.2263, "step": 4133 }, { "epoch": 1.408997955010225, "grad_norm": 18.953844613403813, "learning_rate": 6.386593378002885e-06, "loss": 1.6709, "step": 4134 }, { "epoch": 1.4093387866394003, "grad_norm": 20.982031110468785, "learning_rate": 6.384687970998625e-06, "loss": 2.2389, "step": 4135 }, { "epoch": 1.4096796182685754, "grad_norm": 352.22498613594826, "learning_rate": 6.382782346177434e-06, "loss": 2.3374, "step": 4136 }, { "epoch": 1.4100204498977504, "grad_norm": 22.00672607503255, "learning_rate": 6.380876503839076e-06, "loss": 2.0881, "step": 4137 }, { "epoch": 1.4103612815269257, "grad_norm": 16.25052418380546, "learning_rate": 6.378970444283345e-06, "loss": 2.1543, "step": 4138 }, { "epoch": 1.4107021131561008, "grad_norm": 21.62533553118729, "learning_rate": 6.3770641678100765e-06, "loss": 2.2178, "step": 4139 }, { "epoch": 1.4110429447852761, "grad_norm": 13.759621322740271, "learning_rate": 6.375157674719131e-06, "loss": 2.5971, "step": 4140 }, { "epoch": 1.4113837764144512, "grad_norm": 14.579761079181587, "learning_rate": 6.373250965310409e-06, "loss": 2.4856, "step": 4141 }, { "epoch": 1.4117246080436265, "grad_norm": 14.074023293999941, "learning_rate": 6.371344039883845e-06, "loss": 1.9761, "step": 4142 }, { "epoch": 1.4120654396728016, "grad_norm": 22.726269742860328, "learning_rate": 6.369436898739403e-06, "loss": 2.0237, "step": 4143 }, { "epoch": 1.4124062713019767, "grad_norm": 22.528727466340133, "learning_rate": 6.367529542177085e-06, "loss": 2.9489, "step": 4144 }, { "epoch": 1.412747102931152, "grad_norm": 14.825812036375087, "learning_rate": 6.365621970496929e-06, "loss": 2.2372, "step": 4145 }, { "epoch": 1.4130879345603273, "grad_norm": 15.416948315380447, "learning_rate": 6.363714183998998e-06, "loss": 2.0254, "step": 4146 }, { "epoch": 1.4134287661895024, "grad_norm": 14.698314649298018, "learning_rate": 6.361806182983396e-06, "loss": 1.8583, "step": 4147 }, { "epoch": 1.4137695978186775, "grad_norm": 18.45917086961421, "learning_rate": 6.359897967750265e-06, "loss": 2.0, "step": 4148 }, { "epoch": 1.4141104294478528, "grad_norm": 29.24096146733855, "learning_rate": 6.357989538599767e-06, "loss": 2.4595, "step": 4149 }, { "epoch": 1.414451261077028, "grad_norm": 10.253272857884088, "learning_rate": 6.35608089583211e-06, "loss": 1.7902, "step": 4150 }, { "epoch": 1.4147920927062032, "grad_norm": 22.76363082770309, "learning_rate": 6.354172039747529e-06, "loss": 2.4769, "step": 4151 }, { "epoch": 1.4151329243353783, "grad_norm": 14.634767613237905, "learning_rate": 6.352262970646296e-06, "loss": 2.0706, "step": 4152 }, { "epoch": 1.4154737559645536, "grad_norm": 14.680772971891189, "learning_rate": 6.350353688828714e-06, "loss": 2.0739, "step": 4153 }, { "epoch": 1.4158145875937287, "grad_norm": 14.720155225670668, "learning_rate": 6.348444194595123e-06, "loss": 2.6389, "step": 4154 }, { "epoch": 1.4161554192229038, "grad_norm": 12.181347793025616, "learning_rate": 6.34653448824589e-06, "loss": 2.2509, "step": 4155 }, { "epoch": 1.416496250852079, "grad_norm": 17.994703425404, "learning_rate": 6.344624570081424e-06, "loss": 2.3743, "step": 4156 }, { "epoch": 1.4168370824812544, "grad_norm": 19.367981681632944, "learning_rate": 6.342714440402158e-06, "loss": 2.241, "step": 4157 }, { "epoch": 1.4171779141104295, "grad_norm": 24.063527378896463, "learning_rate": 6.340804099508567e-06, "loss": 2.4286, "step": 4158 }, { "epoch": 1.4175187457396046, "grad_norm": 28.305408740774908, "learning_rate": 6.3388935477011535e-06, "loss": 2.6028, "step": 4159 }, { "epoch": 1.4178595773687799, "grad_norm": 15.262134528043909, "learning_rate": 6.336982785280455e-06, "loss": 2.2715, "step": 4160 }, { "epoch": 1.418200408997955, "grad_norm": 16.111081824053684, "learning_rate": 6.335071812547042e-06, "loss": 1.7806, "step": 4161 }, { "epoch": 1.4185412406271303, "grad_norm": 35.480247532982006, "learning_rate": 6.33316062980152e-06, "loss": 2.0574, "step": 4162 }, { "epoch": 1.4188820722563054, "grad_norm": 19.60758558617339, "learning_rate": 6.3312492373445224e-06, "loss": 2.4616, "step": 4163 }, { "epoch": 1.4192229038854807, "grad_norm": 24.902592795990603, "learning_rate": 6.32933763547672e-06, "loss": 2.2607, "step": 4164 }, { "epoch": 1.4195637355146558, "grad_norm": 17.778563612495017, "learning_rate": 6.327425824498816e-06, "loss": 2.1846, "step": 4165 }, { "epoch": 1.4199045671438308, "grad_norm": 11.81758538025014, "learning_rate": 6.325513804711546e-06, "loss": 1.9197, "step": 4166 }, { "epoch": 1.4202453987730062, "grad_norm": 33.00935052970937, "learning_rate": 6.323601576415678e-06, "loss": 2.0023, "step": 4167 }, { "epoch": 1.4205862304021815, "grad_norm": 14.208665203697588, "learning_rate": 6.321689139912013e-06, "loss": 2.0178, "step": 4168 }, { "epoch": 1.4209270620313565, "grad_norm": 16.816914787292436, "learning_rate": 6.319776495501384e-06, "loss": 1.8852, "step": 4169 }, { "epoch": 1.4212678936605316, "grad_norm": 15.407702196022148, "learning_rate": 6.317863643484661e-06, "loss": 2.6873, "step": 4170 }, { "epoch": 1.421608725289707, "grad_norm": 14.719541047486363, "learning_rate": 6.315950584162739e-06, "loss": 1.6438, "step": 4171 }, { "epoch": 1.421949556918882, "grad_norm": 15.973866953295847, "learning_rate": 6.31403731783655e-06, "loss": 2.408, "step": 4172 }, { "epoch": 1.4222903885480573, "grad_norm": 17.527285050790386, "learning_rate": 6.312123844807063e-06, "loss": 2.5421, "step": 4173 }, { "epoch": 1.4226312201772324, "grad_norm": 17.50606732492775, "learning_rate": 6.310210165375271e-06, "loss": 2.2525, "step": 4174 }, { "epoch": 1.4229720518064077, "grad_norm": 15.138694469002937, "learning_rate": 6.308296279842204e-06, "loss": 2.6332, "step": 4175 }, { "epoch": 1.4233128834355828, "grad_norm": 10.92006462807346, "learning_rate": 6.306382188508924e-06, "loss": 2.3816, "step": 4176 }, { "epoch": 1.423653715064758, "grad_norm": 16.6847621628487, "learning_rate": 6.304467891676525e-06, "loss": 2.3162, "step": 4177 }, { "epoch": 1.4239945466939332, "grad_norm": 18.386561194723605, "learning_rate": 6.302553389646135e-06, "loss": 2.5523, "step": 4178 }, { "epoch": 1.4243353783231083, "grad_norm": 13.787999506991124, "learning_rate": 6.300638682718911e-06, "loss": 2.6583, "step": 4179 }, { "epoch": 1.4246762099522836, "grad_norm": 30.698395412153527, "learning_rate": 6.298723771196046e-06, "loss": 2.3897, "step": 4180 }, { "epoch": 1.4250170415814587, "grad_norm": 16.967694134526244, "learning_rate": 6.2968086553787586e-06, "loss": 2.3993, "step": 4181 }, { "epoch": 1.425357873210634, "grad_norm": 24.62544892765898, "learning_rate": 6.2948933355683105e-06, "loss": 2.3652, "step": 4182 }, { "epoch": 1.425698704839809, "grad_norm": 22.460790262252434, "learning_rate": 6.292977812065986e-06, "loss": 1.8015, "step": 4183 }, { "epoch": 1.4260395364689844, "grad_norm": 10.554610217745585, "learning_rate": 6.291062085173103e-06, "loss": 2.5285, "step": 4184 }, { "epoch": 1.4263803680981595, "grad_norm": 20.87489133084229, "learning_rate": 6.289146155191015e-06, "loss": 1.9355, "step": 4185 }, { "epoch": 1.4267211997273348, "grad_norm": 14.359916100258236, "learning_rate": 6.2872300224211045e-06, "loss": 2.2342, "step": 4186 }, { "epoch": 1.4270620313565099, "grad_norm": 14.163856174852588, "learning_rate": 6.285313687164789e-06, "loss": 1.656, "step": 4187 }, { "epoch": 1.427402862985685, "grad_norm": 20.08092368138605, "learning_rate": 6.283397149723514e-06, "loss": 2.6091, "step": 4188 }, { "epoch": 1.4277436946148603, "grad_norm": 16.85974387576498, "learning_rate": 6.281480410398755e-06, "loss": 2.333, "step": 4189 }, { "epoch": 1.4280845262440354, "grad_norm": 21.511297075794307, "learning_rate": 6.279563469492029e-06, "loss": 2.5258, "step": 4190 }, { "epoch": 1.4284253578732107, "grad_norm": 12.692910719790788, "learning_rate": 6.277646327304876e-06, "loss": 2.1101, "step": 4191 }, { "epoch": 1.4287661895023858, "grad_norm": 24.867981489883274, "learning_rate": 6.275728984138869e-06, "loss": 1.9843, "step": 4192 }, { "epoch": 1.429107021131561, "grad_norm": 25.95256324955016, "learning_rate": 6.2738114402956145e-06, "loss": 2.5161, "step": 4193 }, { "epoch": 1.4294478527607362, "grad_norm": 15.191864365946055, "learning_rate": 6.271893696076749e-06, "loss": 2.4124, "step": 4194 }, { "epoch": 1.4297886843899112, "grad_norm": 14.332128678671753, "learning_rate": 6.2699757517839425e-06, "loss": 1.7101, "step": 4195 }, { "epoch": 1.4301295160190866, "grad_norm": 28.87104246552696, "learning_rate": 6.268057607718895e-06, "loss": 2.4512, "step": 4196 }, { "epoch": 1.4304703476482619, "grad_norm": 18.251559084713737, "learning_rate": 6.266139264183336e-06, "loss": 2.158, "step": 4197 }, { "epoch": 1.430811179277437, "grad_norm": 13.43547482878226, "learning_rate": 6.264220721479033e-06, "loss": 2.475, "step": 4198 }, { "epoch": 1.431152010906612, "grad_norm": 19.84823892724352, "learning_rate": 6.262301979907778e-06, "loss": 2.691, "step": 4199 }, { "epoch": 1.4314928425357873, "grad_norm": 13.209139208067842, "learning_rate": 6.260383039771395e-06, "loss": 2.122, "step": 4200 }, { "epoch": 1.4318336741649624, "grad_norm": 13.331149417705998, "learning_rate": 6.258463901371743e-06, "loss": 2.3425, "step": 4201 }, { "epoch": 1.4321745057941377, "grad_norm": 14.860728690564288, "learning_rate": 6.256544565010711e-06, "loss": 1.9794, "step": 4202 }, { "epoch": 1.4325153374233128, "grad_norm": 17.35000519217022, "learning_rate": 6.254625030990216e-06, "loss": 1.4361, "step": 4203 }, { "epoch": 1.4328561690524881, "grad_norm": 15.692009710966701, "learning_rate": 6.252705299612209e-06, "loss": 2.5015, "step": 4204 }, { "epoch": 1.4331970006816632, "grad_norm": 20.874088560140365, "learning_rate": 6.250785371178671e-06, "loss": 1.75, "step": 4205 }, { "epoch": 1.4335378323108383, "grad_norm": 17.885714389417462, "learning_rate": 6.248865245991615e-06, "loss": 1.8058, "step": 4206 }, { "epoch": 1.4338786639400136, "grad_norm": 16.59138856263439, "learning_rate": 6.246944924353087e-06, "loss": 2.3612, "step": 4207 }, { "epoch": 1.434219495569189, "grad_norm": 26.265938065698933, "learning_rate": 6.245024406565157e-06, "loss": 3.1253, "step": 4208 }, { "epoch": 1.434560327198364, "grad_norm": 30.418837941984698, "learning_rate": 6.24310369292993e-06, "loss": 2.8319, "step": 4209 }, { "epoch": 1.434901158827539, "grad_norm": 15.492970370052923, "learning_rate": 6.241182783749545e-06, "loss": 2.4145, "step": 4210 }, { "epoch": 1.4352419904567144, "grad_norm": 14.654970623711524, "learning_rate": 6.239261679326167e-06, "loss": 2.386, "step": 4211 }, { "epoch": 1.4355828220858895, "grad_norm": 15.581792357055546, "learning_rate": 6.237340379961994e-06, "loss": 2.4502, "step": 4212 }, { "epoch": 1.4359236537150648, "grad_norm": 22.66362302585363, "learning_rate": 6.235418885959255e-06, "loss": 2.7681, "step": 4213 }, { "epoch": 1.43626448534424, "grad_norm": 13.330091029818776, "learning_rate": 6.233497197620206e-06, "loss": 1.9599, "step": 4214 }, { "epoch": 1.4366053169734152, "grad_norm": 15.905976949849602, "learning_rate": 6.2315753152471355e-06, "loss": 2.5152, "step": 4215 }, { "epoch": 1.4369461486025903, "grad_norm": 15.054554940467366, "learning_rate": 6.2296532391423694e-06, "loss": 1.906, "step": 4216 }, { "epoch": 1.4372869802317654, "grad_norm": 22.857127906898214, "learning_rate": 6.227730969608251e-06, "loss": 2.1621, "step": 4217 }, { "epoch": 1.4376278118609407, "grad_norm": 14.427961984621895, "learning_rate": 6.225808506947164e-06, "loss": 2.1973, "step": 4218 }, { "epoch": 1.437968643490116, "grad_norm": 15.03534402302923, "learning_rate": 6.22388585146152e-06, "loss": 2.1455, "step": 4219 }, { "epoch": 1.438309475119291, "grad_norm": 24.40634167255601, "learning_rate": 6.221963003453759e-06, "loss": 2.9244, "step": 4220 }, { "epoch": 1.4386503067484662, "grad_norm": 22.047401670869874, "learning_rate": 6.220039963226354e-06, "loss": 2.0966, "step": 4221 }, { "epoch": 1.4389911383776415, "grad_norm": 21.34540651239847, "learning_rate": 6.218116731081806e-06, "loss": 1.5897, "step": 4222 }, { "epoch": 1.4393319700068166, "grad_norm": 22.059410684659365, "learning_rate": 6.216193307322645e-06, "loss": 2.2273, "step": 4223 }, { "epoch": 1.4396728016359919, "grad_norm": 16.436266967787553, "learning_rate": 6.214269692251438e-06, "loss": 1.9336, "step": 4224 }, { "epoch": 1.440013633265167, "grad_norm": 15.98754581407864, "learning_rate": 6.212345886170774e-06, "loss": 2.0265, "step": 4225 }, { "epoch": 1.4403544648943423, "grad_norm": 23.0872174940283, "learning_rate": 6.210421889383276e-06, "loss": 2.2666, "step": 4226 }, { "epoch": 1.4406952965235174, "grad_norm": 16.52843861759327, "learning_rate": 6.208497702191595e-06, "loss": 2.8351, "step": 4227 }, { "epoch": 1.4410361281526924, "grad_norm": 22.298666109855876, "learning_rate": 6.206573324898416e-06, "loss": 2.2592, "step": 4228 }, { "epoch": 1.4413769597818678, "grad_norm": 14.978301648140484, "learning_rate": 6.2046487578064495e-06, "loss": 1.9295, "step": 4229 }, { "epoch": 1.441717791411043, "grad_norm": 17.90480972558633, "learning_rate": 6.202724001218439e-06, "loss": 2.1672, "step": 4230 }, { "epoch": 1.4420586230402181, "grad_norm": 12.585361350641351, "learning_rate": 6.200799055437153e-06, "loss": 2.2879, "step": 4231 }, { "epoch": 1.4423994546693932, "grad_norm": 20.109374456354946, "learning_rate": 6.198873920765397e-06, "loss": 2.4619, "step": 4232 }, { "epoch": 1.4427402862985685, "grad_norm": 13.080141725853899, "learning_rate": 6.196948597506002e-06, "loss": 2.164, "step": 4233 }, { "epoch": 1.4430811179277436, "grad_norm": 17.755071481631937, "learning_rate": 6.195023085961827e-06, "loss": 2.4733, "step": 4234 }, { "epoch": 1.443421949556919, "grad_norm": 16.867412611086863, "learning_rate": 6.1930973864357625e-06, "loss": 2.06, "step": 4235 }, { "epoch": 1.443762781186094, "grad_norm": 22.103999448089247, "learning_rate": 6.19117149923073e-06, "loss": 2.2865, "step": 4236 }, { "epoch": 1.4441036128152693, "grad_norm": 18.21101645243261, "learning_rate": 6.189245424649679e-06, "loss": 1.973, "step": 4237 }, { "epoch": 1.4444444444444444, "grad_norm": 15.2564358912888, "learning_rate": 6.18731916299559e-06, "loss": 2.0041, "step": 4238 }, { "epoch": 1.4447852760736195, "grad_norm": 17.879490826712384, "learning_rate": 6.1853927145714674e-06, "loss": 2.718, "step": 4239 }, { "epoch": 1.4451261077027948, "grad_norm": 20.174451687470043, "learning_rate": 6.1834660796803534e-06, "loss": 1.5841, "step": 4240 }, { "epoch": 1.4454669393319701, "grad_norm": 12.987564516108856, "learning_rate": 6.181539258625315e-06, "loss": 2.6677, "step": 4241 }, { "epoch": 1.4458077709611452, "grad_norm": 20.09110286861504, "learning_rate": 6.179612251709448e-06, "loss": 2.0094, "step": 4242 }, { "epoch": 1.4461486025903203, "grad_norm": 12.987812387415309, "learning_rate": 6.177685059235876e-06, "loss": 2.1286, "step": 4243 }, { "epoch": 1.4464894342194956, "grad_norm": 14.3545352644821, "learning_rate": 6.175757681507758e-06, "loss": 2.3371, "step": 4244 }, { "epoch": 1.4468302658486707, "grad_norm": 22.288155204113032, "learning_rate": 6.173830118828275e-06, "loss": 2.8501, "step": 4245 }, { "epoch": 1.447171097477846, "grad_norm": 29.64320304280237, "learning_rate": 6.171902371500642e-06, "loss": 2.8582, "step": 4246 }, { "epoch": 1.447511929107021, "grad_norm": 19.564205282650125, "learning_rate": 6.169974439828101e-06, "loss": 2.3193, "step": 4247 }, { "epoch": 1.4478527607361964, "grad_norm": 29.452910982062246, "learning_rate": 6.1680463241139225e-06, "loss": 2.0912, "step": 4248 }, { "epoch": 1.4481935923653715, "grad_norm": 17.1178976348185, "learning_rate": 6.166118024661407e-06, "loss": 2.25, "step": 4249 }, { "epoch": 1.4485344239945466, "grad_norm": 15.664826094723566, "learning_rate": 6.164189541773886e-06, "loss": 1.5707, "step": 4250 }, { "epoch": 1.4488752556237219, "grad_norm": 12.220811830922289, "learning_rate": 6.162260875754716e-06, "loss": 2.1177, "step": 4251 }, { "epoch": 1.4492160872528972, "grad_norm": 23.794005563244202, "learning_rate": 6.160332026907282e-06, "loss": 1.8313, "step": 4252 }, { "epoch": 1.4495569188820723, "grad_norm": 13.6298634651217, "learning_rate": 6.158402995535001e-06, "loss": 2.3747, "step": 4253 }, { "epoch": 1.4498977505112474, "grad_norm": 15.813918315891687, "learning_rate": 6.156473781941319e-06, "loss": 2.2417, "step": 4254 }, { "epoch": 1.4502385821404227, "grad_norm": 13.47904160091913, "learning_rate": 6.154544386429707e-06, "loss": 2.0697, "step": 4255 }, { "epoch": 1.4505794137695978, "grad_norm": 23.011362407020368, "learning_rate": 6.1526148093036656e-06, "loss": 2.2636, "step": 4256 }, { "epoch": 1.450920245398773, "grad_norm": 22.889794017241012, "learning_rate": 6.150685050866727e-06, "loss": 2.0932, "step": 4257 }, { "epoch": 1.4512610770279482, "grad_norm": 30.82330838810119, "learning_rate": 6.148755111422451e-06, "loss": 2.3957, "step": 4258 }, { "epoch": 1.4516019086571235, "grad_norm": 23.935686965115497, "learning_rate": 6.146824991274421e-06, "loss": 2.7841, "step": 4259 }, { "epoch": 1.4519427402862985, "grad_norm": 17.61780993992046, "learning_rate": 6.144894690726256e-06, "loss": 2.561, "step": 4260 }, { "epoch": 1.4522835719154736, "grad_norm": 242.80002039823913, "learning_rate": 6.142964210081597e-06, "loss": 2.0443, "step": 4261 }, { "epoch": 1.452624403544649, "grad_norm": 11.70289154237094, "learning_rate": 6.141033549644118e-06, "loss": 1.9113, "step": 4262 }, { "epoch": 1.4529652351738243, "grad_norm": 21.911882038558463, "learning_rate": 6.1391027097175205e-06, "loss": 2.6845, "step": 4263 }, { "epoch": 1.4533060668029993, "grad_norm": 16.62583260436707, "learning_rate": 6.1371716906055336e-06, "loss": 2.2245, "step": 4264 }, { "epoch": 1.4536468984321744, "grad_norm": 22.64149010667539, "learning_rate": 6.13524049261191e-06, "loss": 2.8715, "step": 4265 }, { "epoch": 1.4539877300613497, "grad_norm": 14.089539580086985, "learning_rate": 6.133309116040439e-06, "loss": 2.1364, "step": 4266 }, { "epoch": 1.4543285616905248, "grad_norm": 14.505852779115182, "learning_rate": 6.131377561194933e-06, "loss": 1.5217, "step": 4267 }, { "epoch": 1.4546693933197001, "grad_norm": 17.185176707948177, "learning_rate": 6.129445828379233e-06, "loss": 2.7124, "step": 4268 }, { "epoch": 1.4550102249488752, "grad_norm": 14.290284085140701, "learning_rate": 6.1275139178972075e-06, "loss": 2.3308, "step": 4269 }, { "epoch": 1.4553510565780505, "grad_norm": 19.096161598925374, "learning_rate": 6.125581830052755e-06, "loss": 1.9929, "step": 4270 }, { "epoch": 1.4556918882072256, "grad_norm": 18.12013683008587, "learning_rate": 6.123649565149799e-06, "loss": 2.4343, "step": 4271 }, { "epoch": 1.4560327198364007, "grad_norm": 13.038962764134952, "learning_rate": 6.1217171234922944e-06, "loss": 2.2847, "step": 4272 }, { "epoch": 1.456373551465576, "grad_norm": 17.360416312255598, "learning_rate": 6.1197845053842195e-06, "loss": 2.2635, "step": 4273 }, { "epoch": 1.4567143830947513, "grad_norm": 25.484879142860926, "learning_rate": 6.117851711129585e-06, "loss": 2.7283, "step": 4274 }, { "epoch": 1.4570552147239264, "grad_norm": 29.376893548359316, "learning_rate": 6.115918741032428e-06, "loss": 2.437, "step": 4275 }, { "epoch": 1.4573960463531015, "grad_norm": 16.16409466806491, "learning_rate": 6.113985595396809e-06, "loss": 2.2009, "step": 4276 }, { "epoch": 1.4577368779822768, "grad_norm": 13.56511507686784, "learning_rate": 6.112052274526823e-06, "loss": 2.1692, "step": 4277 }, { "epoch": 1.4580777096114519, "grad_norm": 13.535814029694661, "learning_rate": 6.110118778726586e-06, "loss": 2.0236, "step": 4278 }, { "epoch": 1.4584185412406272, "grad_norm": 25.073024018092053, "learning_rate": 6.108185108300245e-06, "loss": 1.8582, "step": 4279 }, { "epoch": 1.4587593728698023, "grad_norm": 12.572283803837419, "learning_rate": 6.106251263551977e-06, "loss": 1.88, "step": 4280 }, { "epoch": 1.4591002044989776, "grad_norm": 30.7971577453711, "learning_rate": 6.104317244785982e-06, "loss": 2.1154, "step": 4281 }, { "epoch": 1.4594410361281527, "grad_norm": 13.143771012195034, "learning_rate": 6.1023830523064855e-06, "loss": 2.2885, "step": 4282 }, { "epoch": 1.4597818677573278, "grad_norm": 14.586642640200965, "learning_rate": 6.100448686417749e-06, "loss": 2.3554, "step": 4283 }, { "epoch": 1.460122699386503, "grad_norm": 12.715610464070068, "learning_rate": 6.0985141474240525e-06, "loss": 2.215, "step": 4284 }, { "epoch": 1.4604635310156784, "grad_norm": 20.95063636163122, "learning_rate": 6.096579435629708e-06, "loss": 2.2391, "step": 4285 }, { "epoch": 1.4608043626448535, "grad_norm": 57.65964542070164, "learning_rate": 6.0946445513390525e-06, "loss": 1.8572, "step": 4286 }, { "epoch": 1.4611451942740286, "grad_norm": 14.78284054788824, "learning_rate": 6.092709494856453e-06, "loss": 2.1532, "step": 4287 }, { "epoch": 1.4614860259032039, "grad_norm": 11.99865197803567, "learning_rate": 6.0907742664862975e-06, "loss": 2.2437, "step": 4288 }, { "epoch": 1.461826857532379, "grad_norm": 21.329583554622452, "learning_rate": 6.088838866533011e-06, "loss": 2.1765, "step": 4289 }, { "epoch": 1.4621676891615543, "grad_norm": 11.701458568737111, "learning_rate": 6.086903295301033e-06, "loss": 1.8296, "step": 4290 }, { "epoch": 1.4625085207907293, "grad_norm": 45.92196539672999, "learning_rate": 6.084967553094843e-06, "loss": 1.7079, "step": 4291 }, { "epoch": 1.4628493524199047, "grad_norm": 10.392438969979086, "learning_rate": 6.083031640218937e-06, "loss": 2.0963, "step": 4292 }, { "epoch": 1.4631901840490797, "grad_norm": 17.247349808683282, "learning_rate": 6.081095556977842e-06, "loss": 2.5887, "step": 4293 }, { "epoch": 1.4635310156782548, "grad_norm": 19.810562764139124, "learning_rate": 6.079159303676112e-06, "loss": 2.334, "step": 4294 }, { "epoch": 1.4638718473074301, "grad_norm": 14.540947116198309, "learning_rate": 6.077222880618329e-06, "loss": 2.2917, "step": 4295 }, { "epoch": 1.4642126789366054, "grad_norm": 26.13807109684002, "learning_rate": 6.0752862881090965e-06, "loss": 2.25, "step": 4296 }, { "epoch": 1.4645535105657805, "grad_norm": 15.124923711554166, "learning_rate": 6.073349526453052e-06, "loss": 2.4132, "step": 4297 }, { "epoch": 1.4648943421949556, "grad_norm": 20.550593339355665, "learning_rate": 6.071412595954854e-06, "loss": 2.3937, "step": 4298 }, { "epoch": 1.465235173824131, "grad_norm": 18.914122404323038, "learning_rate": 6.069475496919189e-06, "loss": 2.3307, "step": 4299 }, { "epoch": 1.465576005453306, "grad_norm": 15.970941530060696, "learning_rate": 6.067538229650773e-06, "loss": 2.0849, "step": 4300 }, { "epoch": 1.4659168370824813, "grad_norm": 16.837603364078372, "learning_rate": 6.065600794454343e-06, "loss": 1.9744, "step": 4301 }, { "epoch": 1.4662576687116564, "grad_norm": 19.51989939916051, "learning_rate": 6.063663191634666e-06, "loss": 2.4512, "step": 4302 }, { "epoch": 1.4665985003408317, "grad_norm": 15.077583682172724, "learning_rate": 6.061725421496534e-06, "loss": 2.5204, "step": 4303 }, { "epoch": 1.4669393319700068, "grad_norm": 14.124409501399741, "learning_rate": 6.059787484344767e-06, "loss": 2.0642, "step": 4304 }, { "epoch": 1.467280163599182, "grad_norm": 19.38218886199291, "learning_rate": 6.05784938048421e-06, "loss": 2.2292, "step": 4305 }, { "epoch": 1.4676209952283572, "grad_norm": 16.299590382758964, "learning_rate": 6.0559111102197345e-06, "loss": 2.1577, "step": 4306 }, { "epoch": 1.4679618268575325, "grad_norm": 12.589899971029206, "learning_rate": 6.053972673856236e-06, "loss": 1.9149, "step": 4307 }, { "epoch": 1.4683026584867076, "grad_norm": 14.212808604878427, "learning_rate": 6.052034071698644e-06, "loss": 2.2989, "step": 4308 }, { "epoch": 1.4686434901158827, "grad_norm": 29.39616079600534, "learning_rate": 6.050095304051903e-06, "loss": 2.0846, "step": 4309 }, { "epoch": 1.468984321745058, "grad_norm": 20.69304951942108, "learning_rate": 6.04815637122099e-06, "loss": 2.5216, "step": 4310 }, { "epoch": 1.469325153374233, "grad_norm": 14.192350957687575, "learning_rate": 6.046217273510908e-06, "loss": 2.4299, "step": 4311 }, { "epoch": 1.4696659850034084, "grad_norm": 18.85416948409746, "learning_rate": 6.044278011226683e-06, "loss": 1.7272, "step": 4312 }, { "epoch": 1.4700068166325835, "grad_norm": 23.232350282919203, "learning_rate": 6.042338584673371e-06, "loss": 2.3484, "step": 4313 }, { "epoch": 1.4703476482617588, "grad_norm": 19.635254522626155, "learning_rate": 6.040398994156049e-06, "loss": 2.1765, "step": 4314 }, { "epoch": 1.4706884798909339, "grad_norm": 13.076979793285888, "learning_rate": 6.038459239979826e-06, "loss": 2.3731, "step": 4315 }, { "epoch": 1.471029311520109, "grad_norm": 14.393060088279091, "learning_rate": 6.036519322449829e-06, "loss": 1.9468, "step": 4316 }, { "epoch": 1.4713701431492843, "grad_norm": 12.839167963294821, "learning_rate": 6.034579241871217e-06, "loss": 2.023, "step": 4317 }, { "epoch": 1.4717109747784596, "grad_norm": 17.56449554808446, "learning_rate": 6.032638998549172e-06, "loss": 2.484, "step": 4318 }, { "epoch": 1.4720518064076347, "grad_norm": 16.404348030226192, "learning_rate": 6.030698592788901e-06, "loss": 1.9406, "step": 4319 }, { "epoch": 1.4723926380368098, "grad_norm": 15.499986313430371, "learning_rate": 6.028758024895638e-06, "loss": 1.8789, "step": 4320 }, { "epoch": 1.472733469665985, "grad_norm": 37.41188864338542, "learning_rate": 6.026817295174643e-06, "loss": 2.3985, "step": 4321 }, { "epoch": 1.4730743012951601, "grad_norm": 20.44075541887155, "learning_rate": 6.0248764039312e-06, "loss": 2.281, "step": 4322 }, { "epoch": 1.4734151329243355, "grad_norm": 12.267937133202398, "learning_rate": 6.02293535147062e-06, "loss": 2.3547, "step": 4323 }, { "epoch": 1.4737559645535105, "grad_norm": 16.086559265614447, "learning_rate": 6.020994138098235e-06, "loss": 2.0084, "step": 4324 }, { "epoch": 1.4740967961826859, "grad_norm": 11.087196037558313, "learning_rate": 6.019052764119409e-06, "loss": 2.0872, "step": 4325 }, { "epoch": 1.474437627811861, "grad_norm": 17.846364859924826, "learning_rate": 6.017111229839528e-06, "loss": 2.3159, "step": 4326 }, { "epoch": 1.474778459441036, "grad_norm": 14.490141538655365, "learning_rate": 6.015169535563999e-06, "loss": 2.4355, "step": 4327 }, { "epoch": 1.4751192910702113, "grad_norm": 11.639946560469058, "learning_rate": 6.013227681598263e-06, "loss": 1.9089, "step": 4328 }, { "epoch": 1.4754601226993864, "grad_norm": 14.155393277264535, "learning_rate": 6.011285668247777e-06, "loss": 1.9716, "step": 4329 }, { "epoch": 1.4758009543285617, "grad_norm": 16.367862802599, "learning_rate": 6.009343495818031e-06, "loss": 2.4176, "step": 4330 }, { "epoch": 1.4761417859577368, "grad_norm": 16.09009333025595, "learning_rate": 6.007401164614535e-06, "loss": 2.3468, "step": 4331 }, { "epoch": 1.4764826175869121, "grad_norm": 15.633988974105367, "learning_rate": 6.005458674942826e-06, "loss": 2.5215, "step": 4332 }, { "epoch": 1.4768234492160872, "grad_norm": 16.624538682470327, "learning_rate": 6.003516027108465e-06, "loss": 2.5859, "step": 4333 }, { "epoch": 1.4771642808452625, "grad_norm": 22.964766823710335, "learning_rate": 6.001573221417037e-06, "loss": 2.1781, "step": 4334 }, { "epoch": 1.4775051124744376, "grad_norm": 14.01060032774743, "learning_rate": 5.999630258174156e-06, "loss": 1.9674, "step": 4335 }, { "epoch": 1.477845944103613, "grad_norm": 16.506220272520224, "learning_rate": 5.997687137685455e-06, "loss": 2.2624, "step": 4336 }, { "epoch": 1.478186775732788, "grad_norm": 14.404370658515393, "learning_rate": 5.995743860256595e-06, "loss": 2.0539, "step": 4337 }, { "epoch": 1.478527607361963, "grad_norm": 10.44174277671755, "learning_rate": 5.993800426193264e-06, "loss": 2.0565, "step": 4338 }, { "epoch": 1.4788684389911384, "grad_norm": 22.494032498142996, "learning_rate": 5.991856835801167e-06, "loss": 2.3975, "step": 4339 }, { "epoch": 1.4792092706203135, "grad_norm": 14.635242832998575, "learning_rate": 5.989913089386045e-06, "loss": 2.2482, "step": 4340 }, { "epoch": 1.4795501022494888, "grad_norm": 12.302199216684343, "learning_rate": 5.987969187253649e-06, "loss": 2.2157, "step": 4341 }, { "epoch": 1.4798909338786639, "grad_norm": 16.301296609549595, "learning_rate": 5.9860251297097695e-06, "loss": 2.3824, "step": 4342 }, { "epoch": 1.4802317655078392, "grad_norm": 21.84649136079427, "learning_rate": 5.984080917060213e-06, "loss": 2.3244, "step": 4343 }, { "epoch": 1.4805725971370143, "grad_norm": 26.51710485272529, "learning_rate": 5.98213654961081e-06, "loss": 2.2449, "step": 4344 }, { "epoch": 1.4809134287661894, "grad_norm": 14.56186016740109, "learning_rate": 5.980192027667416e-06, "loss": 2.6343, "step": 4345 }, { "epoch": 1.4812542603953647, "grad_norm": 17.193353823222914, "learning_rate": 5.978247351535917e-06, "loss": 2.3574, "step": 4346 }, { "epoch": 1.48159509202454, "grad_norm": 15.853976602905274, "learning_rate": 5.9763025215222144e-06, "loss": 2.4601, "step": 4347 }, { "epoch": 1.481935923653715, "grad_norm": 19.708376564723228, "learning_rate": 5.97435753793224e-06, "loss": 2.0711, "step": 4348 }, { "epoch": 1.4822767552828902, "grad_norm": 19.26244784265896, "learning_rate": 5.972412401071943e-06, "loss": 1.8038, "step": 4349 }, { "epoch": 1.4826175869120655, "grad_norm": 19.675499374713446, "learning_rate": 5.970467111247307e-06, "loss": 2.3348, "step": 4350 }, { "epoch": 1.4829584185412406, "grad_norm": 19.183839445836153, "learning_rate": 5.968521668764331e-06, "loss": 3.1425, "step": 4351 }, { "epoch": 1.4832992501704159, "grad_norm": 15.853785278498082, "learning_rate": 5.96657607392904e-06, "loss": 2.3034, "step": 4352 }, { "epoch": 1.483640081799591, "grad_norm": 12.174878318202902, "learning_rate": 5.964630327047485e-06, "loss": 1.9542, "step": 4353 }, { "epoch": 1.4839809134287663, "grad_norm": 24.327380907425574, "learning_rate": 5.96268442842574e-06, "loss": 2.6644, "step": 4354 }, { "epoch": 1.4843217450579413, "grad_norm": 17.68424270666283, "learning_rate": 5.9607383783699014e-06, "loss": 2.3073, "step": 4355 }, { "epoch": 1.4846625766871164, "grad_norm": 34.12590406465019, "learning_rate": 5.958792177186093e-06, "loss": 2.3565, "step": 4356 }, { "epoch": 1.4850034083162917, "grad_norm": 15.672661659068368, "learning_rate": 5.956845825180457e-06, "loss": 1.9451, "step": 4357 }, { "epoch": 1.485344239945467, "grad_norm": 19.245374370862002, "learning_rate": 5.954899322659163e-06, "loss": 1.821, "step": 4358 }, { "epoch": 1.4856850715746421, "grad_norm": 15.983421880457763, "learning_rate": 5.9529526699284054e-06, "loss": 2.3557, "step": 4359 }, { "epoch": 1.4860259032038172, "grad_norm": 20.991168062847308, "learning_rate": 5.951005867294401e-06, "loss": 2.4095, "step": 4360 }, { "epoch": 1.4863667348329925, "grad_norm": 20.864067691311067, "learning_rate": 5.949058915063386e-06, "loss": 2.6486, "step": 4361 }, { "epoch": 1.4867075664621676, "grad_norm": 14.669008420087733, "learning_rate": 5.947111813541625e-06, "loss": 2.3718, "step": 4362 }, { "epoch": 1.487048398091343, "grad_norm": 14.614261738980627, "learning_rate": 5.945164563035408e-06, "loss": 2.4114, "step": 4363 }, { "epoch": 1.487389229720518, "grad_norm": 17.210871030497227, "learning_rate": 5.943217163851041e-06, "loss": 2.5233, "step": 4364 }, { "epoch": 1.4877300613496933, "grad_norm": 16.214764175071146, "learning_rate": 5.94126961629486e-06, "loss": 1.2758, "step": 4365 }, { "epoch": 1.4880708929788684, "grad_norm": 16.01496250527842, "learning_rate": 5.93932192067322e-06, "loss": 1.6968, "step": 4366 }, { "epoch": 1.4884117246080435, "grad_norm": 17.22485266381545, "learning_rate": 5.937374077292503e-06, "loss": 2.4804, "step": 4367 }, { "epoch": 1.4887525562372188, "grad_norm": 19.867666478342336, "learning_rate": 5.9354260864591155e-06, "loss": 1.8169, "step": 4368 }, { "epoch": 1.4890933878663941, "grad_norm": 16.638604739152306, "learning_rate": 5.933477948479478e-06, "loss": 2.3097, "step": 4369 }, { "epoch": 1.4894342194955692, "grad_norm": 14.995261878171839, "learning_rate": 5.931529663660042e-06, "loss": 2.5542, "step": 4370 }, { "epoch": 1.4897750511247443, "grad_norm": 12.212560788723833, "learning_rate": 5.9295812323072845e-06, "loss": 2.1694, "step": 4371 }, { "epoch": 1.4901158827539196, "grad_norm": 21.273236827636545, "learning_rate": 5.927632654727696e-06, "loss": 2.0328, "step": 4372 }, { "epoch": 1.4904567143830947, "grad_norm": 29.05755149922163, "learning_rate": 5.9256839312278e-06, "loss": 1.3758, "step": 4373 }, { "epoch": 1.49079754601227, "grad_norm": 15.995402679235733, "learning_rate": 5.923735062114137e-06, "loss": 2.612, "step": 4374 }, { "epoch": 1.491138377641445, "grad_norm": 20.392348800079166, "learning_rate": 5.92178604769327e-06, "loss": 2.2273, "step": 4375 }, { "epoch": 1.4914792092706204, "grad_norm": 12.539207363167089, "learning_rate": 5.91983688827179e-06, "loss": 2.2712, "step": 4376 }, { "epoch": 1.4918200408997955, "grad_norm": 14.859699208246981, "learning_rate": 5.917887584156305e-06, "loss": 2.3026, "step": 4377 }, { "epoch": 1.4921608725289706, "grad_norm": 18.51362982762398, "learning_rate": 5.91593813565345e-06, "loss": 2.0117, "step": 4378 }, { "epoch": 1.4925017041581459, "grad_norm": 21.970984023736694, "learning_rate": 5.913988543069878e-06, "loss": 2.2254, "step": 4379 }, { "epoch": 1.4928425357873212, "grad_norm": 27.294980308504538, "learning_rate": 5.9120388067122715e-06, "loss": 2.2178, "step": 4380 }, { "epoch": 1.4931833674164963, "grad_norm": 18.42680947737345, "learning_rate": 5.91008892688733e-06, "loss": 2.8541, "step": 4381 }, { "epoch": 1.4935241990456714, "grad_norm": 19.22236118660401, "learning_rate": 5.908138903901776e-06, "loss": 2.2953, "step": 4382 }, { "epoch": 1.4938650306748467, "grad_norm": 17.854036861615693, "learning_rate": 5.9061887380623585e-06, "loss": 2.1056, "step": 4383 }, { "epoch": 1.4942058623040217, "grad_norm": 13.975789898285342, "learning_rate": 5.904238429675845e-06, "loss": 2.375, "step": 4384 }, { "epoch": 1.494546693933197, "grad_norm": 18.122727077258578, "learning_rate": 5.902287979049027e-06, "loss": 2.4284, "step": 4385 }, { "epoch": 1.4948875255623721, "grad_norm": 16.10644014774808, "learning_rate": 5.9003373864887185e-06, "loss": 1.8998, "step": 4386 }, { "epoch": 1.4952283571915475, "grad_norm": 15.610424663536032, "learning_rate": 5.898386652301754e-06, "loss": 2.267, "step": 4387 }, { "epoch": 1.4955691888207225, "grad_norm": 16.316840219254782, "learning_rate": 5.896435776794994e-06, "loss": 2.1361, "step": 4388 }, { "epoch": 1.4959100204498976, "grad_norm": 14.158062634043883, "learning_rate": 5.894484760275318e-06, "loss": 2.387, "step": 4389 }, { "epoch": 1.496250852079073, "grad_norm": 20.83430941653588, "learning_rate": 5.892533603049628e-06, "loss": 2.5654, "step": 4390 }, { "epoch": 1.4965916837082482, "grad_norm": 16.18184828947139, "learning_rate": 5.890582305424848e-06, "loss": 2.7041, "step": 4391 }, { "epoch": 1.4969325153374233, "grad_norm": 14.648440214088051, "learning_rate": 5.8886308677079265e-06, "loss": 1.5609, "step": 4392 }, { "epoch": 1.4972733469665984, "grad_norm": 22.247200744273147, "learning_rate": 5.886679290205832e-06, "loss": 2.7497, "step": 4393 }, { "epoch": 1.4976141785957737, "grad_norm": 14.874886673186973, "learning_rate": 5.884727573225558e-06, "loss": 2.2733, "step": 4394 }, { "epoch": 1.4979550102249488, "grad_norm": 18.928202482097987, "learning_rate": 5.8827757170741126e-06, "loss": 2.7269, "step": 4395 }, { "epoch": 1.4982958418541241, "grad_norm": 17.73729242999824, "learning_rate": 5.880823722058533e-06, "loss": 2.6446, "step": 4396 }, { "epoch": 1.4986366734832992, "grad_norm": 21.13625665848129, "learning_rate": 5.878871588485876e-06, "loss": 1.8042, "step": 4397 }, { "epoch": 1.4989775051124745, "grad_norm": 18.73452948178375, "learning_rate": 5.876919316663219e-06, "loss": 2.5779, "step": 4398 }, { "epoch": 1.4993183367416496, "grad_norm": 20.13286226542239, "learning_rate": 5.874966906897663e-06, "loss": 2.6525, "step": 4399 }, { "epoch": 1.4996591683708247, "grad_norm": 24.69860815147935, "learning_rate": 5.8730143594963296e-06, "loss": 2.2239, "step": 4400 }, { "epoch": 1.5, "grad_norm": 25.558244928201383, "learning_rate": 5.871061674766362e-06, "loss": 2.7978, "step": 4401 }, { "epoch": 1.5003408316291753, "grad_norm": 14.886890223038504, "learning_rate": 5.8691088530149265e-06, "loss": 2.2302, "step": 4402 }, { "epoch": 1.5006816632583504, "grad_norm": 19.412116256120086, "learning_rate": 5.867155894549208e-06, "loss": 1.939, "step": 4403 }, { "epoch": 1.5010224948875255, "grad_norm": 30.85734612653365, "learning_rate": 5.865202799676414e-06, "loss": 2.2362, "step": 4404 }, { "epoch": 1.5013633265167008, "grad_norm": 14.391035080093314, "learning_rate": 5.863249568703777e-06, "loss": 2.5675, "step": 4405 }, { "epoch": 1.501704158145876, "grad_norm": 15.091927908563555, "learning_rate": 5.861296201938547e-06, "loss": 2.529, "step": 4406 }, { "epoch": 1.502044989775051, "grad_norm": 12.909622535084232, "learning_rate": 5.859342699687993e-06, "loss": 2.3284, "step": 4407 }, { "epoch": 1.5023858214042263, "grad_norm": 18.254061869263904, "learning_rate": 5.857389062259414e-06, "loss": 2.459, "step": 4408 }, { "epoch": 1.5027266530334016, "grad_norm": 13.521037819599774, "learning_rate": 5.855435289960122e-06, "loss": 2.5122, "step": 4409 }, { "epoch": 1.5030674846625767, "grad_norm": 12.627369573483533, "learning_rate": 5.853481383097453e-06, "loss": 2.3909, "step": 4410 }, { "epoch": 1.5034083162917518, "grad_norm": 24.500293654417543, "learning_rate": 5.851527341978767e-06, "loss": 2.7882, "step": 4411 }, { "epoch": 1.503749147920927, "grad_norm": 14.593590357615406, "learning_rate": 5.849573166911437e-06, "loss": 2.2719, "step": 4412 }, { "epoch": 1.5040899795501024, "grad_norm": 16.741907622236518, "learning_rate": 5.847618858202868e-06, "loss": 2.2827, "step": 4413 }, { "epoch": 1.5044308111792775, "grad_norm": 41.12631586840671, "learning_rate": 5.845664416160478e-06, "loss": 2.8361, "step": 4414 }, { "epoch": 1.5047716428084525, "grad_norm": 10.02700999534083, "learning_rate": 5.843709841091708e-06, "loss": 2.1223, "step": 4415 }, { "epoch": 1.5051124744376279, "grad_norm": 12.124044832579232, "learning_rate": 5.8417551333040225e-06, "loss": 1.7476, "step": 4416 }, { "epoch": 1.5054533060668032, "grad_norm": 29.226746138162905, "learning_rate": 5.839800293104904e-06, "loss": 2.2365, "step": 4417 }, { "epoch": 1.505794137695978, "grad_norm": 18.21900625925857, "learning_rate": 5.837845320801855e-06, "loss": 1.6272, "step": 4418 }, { "epoch": 1.5061349693251533, "grad_norm": 36.45758020967617, "learning_rate": 5.835890216702403e-06, "loss": 2.6181, "step": 4419 }, { "epoch": 1.5064758009543286, "grad_norm": 14.937692253151669, "learning_rate": 5.8339349811140935e-06, "loss": 2.2365, "step": 4420 }, { "epoch": 1.5068166325835037, "grad_norm": 13.856883177491705, "learning_rate": 5.8319796143444885e-06, "loss": 2.1825, "step": 4421 }, { "epoch": 1.5071574642126788, "grad_norm": 18.66235581193284, "learning_rate": 5.8300241167011805e-06, "loss": 2.4865, "step": 4422 }, { "epoch": 1.5074982958418541, "grad_norm": 11.980513085267878, "learning_rate": 5.828068488491775e-06, "loss": 2.0243, "step": 4423 }, { "epoch": 1.5078391274710294, "grad_norm": 20.508395866238647, "learning_rate": 5.826112730023898e-06, "loss": 2.3111, "step": 4424 }, { "epoch": 1.5081799591002045, "grad_norm": 15.153418986583143, "learning_rate": 5.824156841605202e-06, "loss": 2.0755, "step": 4425 }, { "epoch": 1.5085207907293796, "grad_norm": 15.703733243239974, "learning_rate": 5.822200823543354e-06, "loss": 2.7779, "step": 4426 }, { "epoch": 1.508861622358555, "grad_norm": 14.251157260829073, "learning_rate": 5.820244676146042e-06, "loss": 2.0868, "step": 4427 }, { "epoch": 1.50920245398773, "grad_norm": 14.149203967149003, "learning_rate": 5.818288399720978e-06, "loss": 2.164, "step": 4428 }, { "epoch": 1.509543285616905, "grad_norm": 21.21821425587182, "learning_rate": 5.816331994575892e-06, "loss": 2.3317, "step": 4429 }, { "epoch": 1.5098841172460804, "grad_norm": 13.863420653658345, "learning_rate": 5.814375461018532e-06, "loss": 1.749, "step": 4430 }, { "epoch": 1.5102249488752557, "grad_norm": 21.93916276195974, "learning_rate": 5.812418799356672e-06, "loss": 2.3073, "step": 4431 }, { "epoch": 1.5105657805044308, "grad_norm": 20.39953958683771, "learning_rate": 5.810462009898099e-06, "loss": 2.9469, "step": 4432 }, { "epoch": 1.5109066121336059, "grad_norm": 18.24309487640441, "learning_rate": 5.808505092950627e-06, "loss": 2.4919, "step": 4433 }, { "epoch": 1.5112474437627812, "grad_norm": 15.680008778556585, "learning_rate": 5.8065480488220845e-06, "loss": 1.8797, "step": 4434 }, { "epoch": 1.5115882753919565, "grad_norm": 19.04827567637642, "learning_rate": 5.804590877820323e-06, "loss": 2.4124, "step": 4435 }, { "epoch": 1.5119291070211316, "grad_norm": 20.685789229847273, "learning_rate": 5.802633580253215e-06, "loss": 2.3996, "step": 4436 }, { "epoch": 1.5122699386503067, "grad_norm": 14.90527949181147, "learning_rate": 5.800676156428649e-06, "loss": 2.1288, "step": 4437 }, { "epoch": 1.512610770279482, "grad_norm": 20.944916764061936, "learning_rate": 5.798718606654535e-06, "loss": 2.9461, "step": 4438 }, { "epoch": 1.512951601908657, "grad_norm": 18.103054874099506, "learning_rate": 5.796760931238807e-06, "loss": 2.944, "step": 4439 }, { "epoch": 1.5132924335378322, "grad_norm": 23.60172217259511, "learning_rate": 5.7948031304894115e-06, "loss": 2.4022, "step": 4440 }, { "epoch": 1.5136332651670075, "grad_norm": 14.86106548121626, "learning_rate": 5.792845204714319e-06, "loss": 2.0864, "step": 4441 }, { "epoch": 1.5139740967961828, "grad_norm": 12.03073156570767, "learning_rate": 5.790887154221521e-06, "loss": 2.4186, "step": 4442 }, { "epoch": 1.5143149284253579, "grad_norm": 15.847178466827643, "learning_rate": 5.788928979319024e-06, "loss": 2.1119, "step": 4443 }, { "epoch": 1.514655760054533, "grad_norm": 18.95602963578717, "learning_rate": 5.7869706803148584e-06, "loss": 2.4698, "step": 4444 }, { "epoch": 1.5149965916837083, "grad_norm": 16.074294736576917, "learning_rate": 5.785012257517074e-06, "loss": 2.4274, "step": 4445 }, { "epoch": 1.5153374233128836, "grad_norm": 11.793597161533356, "learning_rate": 5.783053711233734e-06, "loss": 1.5285, "step": 4446 }, { "epoch": 1.5156782549420587, "grad_norm": 23.456431194985754, "learning_rate": 5.781095041772929e-06, "loss": 2.0796, "step": 4447 }, { "epoch": 1.5160190865712337, "grad_norm": 20.218628181745157, "learning_rate": 5.779136249442765e-06, "loss": 2.52, "step": 4448 }, { "epoch": 1.516359918200409, "grad_norm": 13.703867807308157, "learning_rate": 5.777177334551367e-06, "loss": 2.0232, "step": 4449 }, { "epoch": 1.5167007498295841, "grad_norm": 14.411004856013218, "learning_rate": 5.775218297406879e-06, "loss": 2.4637, "step": 4450 }, { "epoch": 1.5170415814587592, "grad_norm": 14.601776749000834, "learning_rate": 5.773259138317469e-06, "loss": 2.1378, "step": 4451 }, { "epoch": 1.5173824130879345, "grad_norm": 14.986078956973593, "learning_rate": 5.771299857591317e-06, "loss": 2.1091, "step": 4452 }, { "epoch": 1.5177232447171098, "grad_norm": 31.542911771964874, "learning_rate": 5.769340455536629e-06, "loss": 2.3165, "step": 4453 }, { "epoch": 1.518064076346285, "grad_norm": 20.42977572361904, "learning_rate": 5.7673809324616235e-06, "loss": 2.4137, "step": 4454 }, { "epoch": 1.51840490797546, "grad_norm": 14.559900918365898, "learning_rate": 5.765421288674542e-06, "loss": 1.7066, "step": 4455 }, { "epoch": 1.5187457396046353, "grad_norm": 14.772606808981097, "learning_rate": 5.763461524483645e-06, "loss": 1.9501, "step": 4456 }, { "epoch": 1.5190865712338106, "grad_norm": 23.03491619089363, "learning_rate": 5.761501640197212e-06, "loss": 2.4569, "step": 4457 }, { "epoch": 1.5194274028629857, "grad_norm": 16.848824042349236, "learning_rate": 5.759541636123539e-06, "loss": 1.6339, "step": 4458 }, { "epoch": 1.5197682344921608, "grad_norm": 18.124254777033162, "learning_rate": 5.757581512570943e-06, "loss": 2.5538, "step": 4459 }, { "epoch": 1.5201090661213361, "grad_norm": 17.856634659117717, "learning_rate": 5.755621269847758e-06, "loss": 2.2541, "step": 4460 }, { "epoch": 1.5204498977505112, "grad_norm": 13.849110966944009, "learning_rate": 5.753660908262342e-06, "loss": 2.3696, "step": 4461 }, { "epoch": 1.5207907293796863, "grad_norm": 13.584582524788413, "learning_rate": 5.751700428123065e-06, "loss": 2.6209, "step": 4462 }, { "epoch": 1.5211315610088616, "grad_norm": 19.112757412219054, "learning_rate": 5.749739829738316e-06, "loss": 2.2952, "step": 4463 }, { "epoch": 1.521472392638037, "grad_norm": 18.088438281388893, "learning_rate": 5.747779113416508e-06, "loss": 2.183, "step": 4464 }, { "epoch": 1.521813224267212, "grad_norm": 11.992419791784215, "learning_rate": 5.745818279466069e-06, "loss": 1.9801, "step": 4465 }, { "epoch": 1.522154055896387, "grad_norm": 18.736318127353996, "learning_rate": 5.743857328195446e-06, "loss": 2.3415, "step": 4466 }, { "epoch": 1.5224948875255624, "grad_norm": 21.830839218996527, "learning_rate": 5.741896259913103e-06, "loss": 2.2487, "step": 4467 }, { "epoch": 1.5228357191547377, "grad_norm": 18.333678034586118, "learning_rate": 5.739935074927526e-06, "loss": 2.6311, "step": 4468 }, { "epoch": 1.5231765507839128, "grad_norm": 20.283494017826694, "learning_rate": 5.737973773547216e-06, "loss": 2.303, "step": 4469 }, { "epoch": 1.5235173824130879, "grad_norm": 14.959179322809163, "learning_rate": 5.7360123560806944e-06, "loss": 2.2666, "step": 4470 }, { "epoch": 1.5238582140422632, "grad_norm": 25.75322318743068, "learning_rate": 5.734050822836499e-06, "loss": 2.2817, "step": 4471 }, { "epoch": 1.5241990456714383, "grad_norm": 15.714489051219761, "learning_rate": 5.732089174123185e-06, "loss": 2.0088, "step": 4472 }, { "epoch": 1.5245398773006134, "grad_norm": 20.199216484723046, "learning_rate": 5.730127410249333e-06, "loss": 2.7705, "step": 4473 }, { "epoch": 1.5248807089297887, "grad_norm": 15.680619179307195, "learning_rate": 5.728165531523531e-06, "loss": 2.5124, "step": 4474 }, { "epoch": 1.525221540558964, "grad_norm": 12.090547151637967, "learning_rate": 5.726203538254392e-06, "loss": 2.209, "step": 4475 }, { "epoch": 1.525562372188139, "grad_norm": 30.17350445026114, "learning_rate": 5.7242414307505465e-06, "loss": 2.1134, "step": 4476 }, { "epoch": 1.5259032038173141, "grad_norm": 16.987742033332104, "learning_rate": 5.722279209320641e-06, "loss": 2.3002, "step": 4477 }, { "epoch": 1.5262440354464895, "grad_norm": 31.554420522679724, "learning_rate": 5.72031687427334e-06, "loss": 2.105, "step": 4478 }, { "epoch": 1.5265848670756648, "grad_norm": 19.561010236949638, "learning_rate": 5.718354425917328e-06, "loss": 2.1272, "step": 4479 }, { "epoch": 1.5269256987048399, "grad_norm": 12.530186208826217, "learning_rate": 5.716391864561303e-06, "loss": 2.3438, "step": 4480 }, { "epoch": 1.527266530334015, "grad_norm": 16.035803145145216, "learning_rate": 5.714429190513988e-06, "loss": 2.6538, "step": 4481 }, { "epoch": 1.5276073619631902, "grad_norm": 22.89299402574627, "learning_rate": 5.712466404084117e-06, "loss": 2.4911, "step": 4482 }, { "epoch": 1.5279481935923653, "grad_norm": 16.503772399703138, "learning_rate": 5.710503505580445e-06, "loss": 2.7619, "step": 4483 }, { "epoch": 1.5282890252215404, "grad_norm": 15.326450524968614, "learning_rate": 5.708540495311742e-06, "loss": 2.0846, "step": 4484 }, { "epoch": 1.5286298568507157, "grad_norm": 16.319309335572246, "learning_rate": 5.706577373586799e-06, "loss": 2.3781, "step": 4485 }, { "epoch": 1.528970688479891, "grad_norm": 18.99701068828645, "learning_rate": 5.704614140714422e-06, "loss": 1.5151, "step": 4486 }, { "epoch": 1.5293115201090661, "grad_norm": 18.491769616070897, "learning_rate": 5.702650797003437e-06, "loss": 2.55, "step": 4487 }, { "epoch": 1.5296523517382412, "grad_norm": 18.316054523060785, "learning_rate": 5.700687342762684e-06, "loss": 2.3734, "step": 4488 }, { "epoch": 1.5299931833674165, "grad_norm": 24.661487887213248, "learning_rate": 5.698723778301021e-06, "loss": 2.1193, "step": 4489 }, { "epoch": 1.5303340149965918, "grad_norm": 16.256841104142268, "learning_rate": 5.696760103927329e-06, "loss": 2.3262, "step": 4490 }, { "epoch": 1.530674846625767, "grad_norm": 26.452010638481248, "learning_rate": 5.694796319950497e-06, "loss": 1.6503, "step": 4491 }, { "epoch": 1.531015678254942, "grad_norm": 22.348801509536344, "learning_rate": 5.692832426679439e-06, "loss": 2.2185, "step": 4492 }, { "epoch": 1.5313565098841173, "grad_norm": 19.338725045868447, "learning_rate": 5.690868424423082e-06, "loss": 2.8113, "step": 4493 }, { "epoch": 1.5316973415132924, "grad_norm": 11.846456876975783, "learning_rate": 5.688904313490372e-06, "loss": 2.2021, "step": 4494 }, { "epoch": 1.5320381731424675, "grad_norm": 16.395202551988312, "learning_rate": 5.686940094190272e-06, "loss": 1.9343, "step": 4495 }, { "epoch": 1.5323790047716428, "grad_norm": 15.877035115543848, "learning_rate": 5.6849757668317586e-06, "loss": 2.2147, "step": 4496 }, { "epoch": 1.532719836400818, "grad_norm": 20.97390855976674, "learning_rate": 5.68301133172383e-06, "loss": 1.6291, "step": 4497 }, { "epoch": 1.5330606680299932, "grad_norm": 18.647775445006904, "learning_rate": 5.681046789175502e-06, "loss": 2.2285, "step": 4498 }, { "epoch": 1.5334014996591683, "grad_norm": 16.943944498590685, "learning_rate": 5.679082139495803e-06, "loss": 2.3699, "step": 4499 }, { "epoch": 1.5337423312883436, "grad_norm": 17.522932187070786, "learning_rate": 5.677117382993779e-06, "loss": 2.6339, "step": 4500 }, { "epoch": 1.534083162917519, "grad_norm": 25.636283146035158, "learning_rate": 5.6751525199784956e-06, "loss": 2.3638, "step": 4501 }, { "epoch": 1.534423994546694, "grad_norm": 22.947926318488395, "learning_rate": 5.673187550759033e-06, "loss": 2.1207, "step": 4502 }, { "epoch": 1.534764826175869, "grad_norm": 16.298488879302703, "learning_rate": 5.671222475644489e-06, "loss": 2.4745, "step": 4503 }, { "epoch": 1.5351056578050444, "grad_norm": 16.892881503603896, "learning_rate": 5.669257294943978e-06, "loss": 2.5625, "step": 4504 }, { "epoch": 1.5354464894342195, "grad_norm": 18.810721396712914, "learning_rate": 5.6672920089666295e-06, "loss": 2.7655, "step": 4505 }, { "epoch": 1.5357873210633946, "grad_norm": 16.23708019399656, "learning_rate": 5.665326618021591e-06, "loss": 2.2891, "step": 4506 }, { "epoch": 1.5361281526925699, "grad_norm": 20.71895409802873, "learning_rate": 5.6633611224180295e-06, "loss": 2.6315, "step": 4507 }, { "epoch": 1.5364689843217452, "grad_norm": 11.060093325691811, "learning_rate": 5.66139552246512e-06, "loss": 2.0289, "step": 4508 }, { "epoch": 1.5368098159509203, "grad_norm": 26.31080175708086, "learning_rate": 5.659429818472064e-06, "loss": 2.2056, "step": 4509 }, { "epoch": 1.5371506475800953, "grad_norm": 17.966651799965245, "learning_rate": 5.6574640107480725e-06, "loss": 3.057, "step": 4510 }, { "epoch": 1.5374914792092707, "grad_norm": 10.471145168779648, "learning_rate": 5.655498099602374e-06, "loss": 2.0043, "step": 4511 }, { "epoch": 1.537832310838446, "grad_norm": 13.880479499215832, "learning_rate": 5.653532085344215e-06, "loss": 2.4699, "step": 4512 }, { "epoch": 1.538173142467621, "grad_norm": 20.74585367647219, "learning_rate": 5.651565968282859e-06, "loss": 2.3074, "step": 4513 }, { "epoch": 1.5385139740967961, "grad_norm": 18.36012233788483, "learning_rate": 5.649599748727579e-06, "loss": 2.1297, "step": 4514 }, { "epoch": 1.5388548057259714, "grad_norm": 38.80231104375928, "learning_rate": 5.647633426987677e-06, "loss": 2.6303, "step": 4515 }, { "epoch": 1.5391956373551465, "grad_norm": 13.863756370189114, "learning_rate": 5.6456670033724555e-06, "loss": 1.8361, "step": 4516 }, { "epoch": 1.5395364689843216, "grad_norm": 19.17303323161461, "learning_rate": 5.643700478191245e-06, "loss": 1.7585, "step": 4517 }, { "epoch": 1.539877300613497, "grad_norm": 17.723271151182608, "learning_rate": 5.641733851753387e-06, "loss": 2.7757, "step": 4518 }, { "epoch": 1.5402181322426722, "grad_norm": 17.37096971489388, "learning_rate": 5.6397671243682395e-06, "loss": 2.5183, "step": 4519 }, { "epoch": 1.5405589638718473, "grad_norm": 17.128054717352043, "learning_rate": 5.637800296345178e-06, "loss": 1.7092, "step": 4520 }, { "epoch": 1.5408997955010224, "grad_norm": 12.594532367964533, "learning_rate": 5.63583336799359e-06, "loss": 1.7114, "step": 4521 }, { "epoch": 1.5412406271301977, "grad_norm": 38.01902789877474, "learning_rate": 5.63386633962288e-06, "loss": 1.62, "step": 4522 }, { "epoch": 1.541581458759373, "grad_norm": 15.085500985553569, "learning_rate": 5.631899211542473e-06, "loss": 2.8388, "step": 4523 }, { "epoch": 1.5419222903885481, "grad_norm": 22.339557268709473, "learning_rate": 5.629931984061806e-06, "loss": 2.2281, "step": 4524 }, { "epoch": 1.5422631220177232, "grad_norm": 15.71327914815617, "learning_rate": 5.627964657490329e-06, "loss": 2.2207, "step": 4525 }, { "epoch": 1.5426039536468985, "grad_norm": 15.207207161489809, "learning_rate": 5.625997232137511e-06, "loss": 2.4799, "step": 4526 }, { "epoch": 1.5429447852760736, "grad_norm": 13.905384900383336, "learning_rate": 5.624029708312837e-06, "loss": 2.3252, "step": 4527 }, { "epoch": 1.5432856169052487, "grad_norm": 15.207839827584456, "learning_rate": 5.622062086325806e-06, "loss": 2.3561, "step": 4528 }, { "epoch": 1.543626448534424, "grad_norm": 22.383305603381622, "learning_rate": 5.620094366485933e-06, "loss": 2.1148, "step": 4529 }, { "epoch": 1.5439672801635993, "grad_norm": 18.063635031186525, "learning_rate": 5.618126549102747e-06, "loss": 2.5071, "step": 4530 }, { "epoch": 1.5443081117927744, "grad_norm": 17.36417222482685, "learning_rate": 5.616158634485793e-06, "loss": 1.9633, "step": 4531 }, { "epoch": 1.5446489434219495, "grad_norm": 15.368398933692372, "learning_rate": 5.614190622944636e-06, "loss": 2.2394, "step": 4532 }, { "epoch": 1.5449897750511248, "grad_norm": 20.66995014101257, "learning_rate": 5.612222514788847e-06, "loss": 2.1755, "step": 4533 }, { "epoch": 1.5453306066803, "grad_norm": 13.883159489876338, "learning_rate": 5.61025431032802e-06, "loss": 1.8324, "step": 4534 }, { "epoch": 1.545671438309475, "grad_norm": 17.069285077286047, "learning_rate": 5.608286009871761e-06, "loss": 2.1934, "step": 4535 }, { "epoch": 1.5460122699386503, "grad_norm": 20.161207577776928, "learning_rate": 5.6063176137296895e-06, "loss": 2.2412, "step": 4536 }, { "epoch": 1.5463531015678256, "grad_norm": 14.954737516201321, "learning_rate": 5.604349122211446e-06, "loss": 1.7689, "step": 4537 }, { "epoch": 1.5466939331970007, "grad_norm": 12.859945054608637, "learning_rate": 5.602380535626679e-06, "loss": 2.607, "step": 4538 }, { "epoch": 1.5470347648261757, "grad_norm": 17.59051769094524, "learning_rate": 5.6004118542850535e-06, "loss": 1.8486, "step": 4539 }, { "epoch": 1.547375596455351, "grad_norm": 16.160519540205215, "learning_rate": 5.598443078496255e-06, "loss": 2.4315, "step": 4540 }, { "epoch": 1.5477164280845264, "grad_norm": 14.925796151242583, "learning_rate": 5.59647420856998e-06, "loss": 2.2024, "step": 4541 }, { "epoch": 1.5480572597137015, "grad_norm": 18.899566168539746, "learning_rate": 5.594505244815935e-06, "loss": 2.3097, "step": 4542 }, { "epoch": 1.5483980913428765, "grad_norm": 19.554760601109123, "learning_rate": 5.592536187543849e-06, "loss": 2.2554, "step": 4543 }, { "epoch": 1.5487389229720518, "grad_norm": 13.591426221566362, "learning_rate": 5.590567037063462e-06, "loss": 2.1538, "step": 4544 }, { "epoch": 1.5490797546012272, "grad_norm": 13.829188094516626, "learning_rate": 5.588597793684529e-06, "loss": 2.1956, "step": 4545 }, { "epoch": 1.549420586230402, "grad_norm": 17.095589028937535, "learning_rate": 5.586628457716821e-06, "loss": 2.5356, "step": 4546 }, { "epoch": 1.5497614178595773, "grad_norm": 12.634154219209861, "learning_rate": 5.584659029470121e-06, "loss": 1.8715, "step": 4547 }, { "epoch": 1.5501022494887526, "grad_norm": 14.973753590959701, "learning_rate": 5.582689509254227e-06, "loss": 2.2156, "step": 4548 }, { "epoch": 1.5504430811179277, "grad_norm": 17.98834271483902, "learning_rate": 5.580719897378955e-06, "loss": 2.2146, "step": 4549 }, { "epoch": 1.5507839127471028, "grad_norm": 19.35957377640842, "learning_rate": 5.5787501941541314e-06, "loss": 2.0851, "step": 4550 }, { "epoch": 1.5511247443762781, "grad_norm": 19.96868296802965, "learning_rate": 5.5767803998895975e-06, "loss": 2.2074, "step": 4551 }, { "epoch": 1.5514655760054534, "grad_norm": 20.663593731404152, "learning_rate": 5.5748105148952115e-06, "loss": 2.3345, "step": 4552 }, { "epoch": 1.5518064076346285, "grad_norm": 16.690297506648037, "learning_rate": 5.572840539480843e-06, "loss": 2.1432, "step": 4553 }, { "epoch": 1.5521472392638036, "grad_norm": 14.826384953917485, "learning_rate": 5.570870473956377e-06, "loss": 2.2964, "step": 4554 }, { "epoch": 1.552488070892979, "grad_norm": 14.071220323692726, "learning_rate": 5.568900318631714e-06, "loss": 2.0338, "step": 4555 }, { "epoch": 1.5528289025221542, "grad_norm": 26.697938344253778, "learning_rate": 5.566930073816764e-06, "loss": 2.4725, "step": 4556 }, { "epoch": 1.553169734151329, "grad_norm": 17.798551277004922, "learning_rate": 5.5649597398214575e-06, "loss": 2.6123, "step": 4557 }, { "epoch": 1.5535105657805044, "grad_norm": 12.287513049906446, "learning_rate": 5.562989316955737e-06, "loss": 2.5775, "step": 4558 }, { "epoch": 1.5538513974096797, "grad_norm": 19.007803119440304, "learning_rate": 5.561018805529553e-06, "loss": 2.0729, "step": 4559 }, { "epoch": 1.5541922290388548, "grad_norm": 13.006272864799202, "learning_rate": 5.559048205852879e-06, "loss": 2.4404, "step": 4560 }, { "epoch": 1.5545330606680299, "grad_norm": 14.586298957211246, "learning_rate": 5.557077518235698e-06, "loss": 1.6723, "step": 4561 }, { "epoch": 1.5548738922972052, "grad_norm": 13.36579397886948, "learning_rate": 5.5551067429880034e-06, "loss": 2.175, "step": 4562 }, { "epoch": 1.5552147239263805, "grad_norm": 20.743318942079622, "learning_rate": 5.55313588041981e-06, "loss": 2.5129, "step": 4563 }, { "epoch": 1.5555555555555556, "grad_norm": 20.264671916343385, "learning_rate": 5.551164930841138e-06, "loss": 2.3455, "step": 4564 }, { "epoch": 1.5558963871847307, "grad_norm": 20.13533189770474, "learning_rate": 5.549193894562032e-06, "loss": 2.3851, "step": 4565 }, { "epoch": 1.556237218813906, "grad_norm": 14.090510022935085, "learning_rate": 5.547222771892539e-06, "loss": 2.1433, "step": 4566 }, { "epoch": 1.556578050443081, "grad_norm": 14.82879322602214, "learning_rate": 5.545251563142725e-06, "loss": 2.8661, "step": 4567 }, { "epoch": 1.5569188820722561, "grad_norm": 26.995243757317944, "learning_rate": 5.543280268622669e-06, "loss": 2.3495, "step": 4568 }, { "epoch": 1.5572597137014315, "grad_norm": 19.255254756246035, "learning_rate": 5.541308888642465e-06, "loss": 2.1623, "step": 4569 }, { "epoch": 1.5576005453306068, "grad_norm": 16.961854471561328, "learning_rate": 5.539337423512217e-06, "loss": 2.4926, "step": 4570 }, { "epoch": 1.5579413769597819, "grad_norm": 14.01331064459722, "learning_rate": 5.537365873542047e-06, "loss": 2.0223, "step": 4571 }, { "epoch": 1.558282208588957, "grad_norm": 12.123596527398922, "learning_rate": 5.535394239042086e-06, "loss": 2.3812, "step": 4572 }, { "epoch": 1.5586230402181322, "grad_norm": 14.752387093153903, "learning_rate": 5.533422520322478e-06, "loss": 2.2265, "step": 4573 }, { "epoch": 1.5589638718473076, "grad_norm": 14.940809183515194, "learning_rate": 5.5314507176933856e-06, "loss": 2.4791, "step": 4574 }, { "epoch": 1.5593047034764826, "grad_norm": 19.28870371571758, "learning_rate": 5.52947883146498e-06, "loss": 2.6672, "step": 4575 }, { "epoch": 1.5596455351056577, "grad_norm": 20.435038679828807, "learning_rate": 5.527506861947445e-06, "loss": 2.2547, "step": 4576 }, { "epoch": 1.559986366734833, "grad_norm": 16.507803246680247, "learning_rate": 5.525534809450981e-06, "loss": 2.3596, "step": 4577 }, { "epoch": 1.5603271983640081, "grad_norm": 18.418613911185275, "learning_rate": 5.5235626742858e-06, "loss": 1.9977, "step": 4578 }, { "epoch": 1.5606680299931832, "grad_norm": 15.44972011545337, "learning_rate": 5.521590456762126e-06, "loss": 2.7591, "step": 4579 }, { "epoch": 1.5610088616223585, "grad_norm": 15.814121777675343, "learning_rate": 5.519618157190197e-06, "loss": 2.2601, "step": 4580 }, { "epoch": 1.5613496932515338, "grad_norm": 12.295472619648839, "learning_rate": 5.517645775880263e-06, "loss": 1.8493, "step": 4581 }, { "epoch": 1.561690524880709, "grad_norm": 14.770501002788462, "learning_rate": 5.5156733131425875e-06, "loss": 1.5913, "step": 4582 }, { "epoch": 1.562031356509884, "grad_norm": 14.511594336066459, "learning_rate": 5.513700769287449e-06, "loss": 2.335, "step": 4583 }, { "epoch": 1.5623721881390593, "grad_norm": 21.453927189682354, "learning_rate": 5.511728144625133e-06, "loss": 1.9614, "step": 4584 }, { "epoch": 1.5627130197682346, "grad_norm": 21.877374074891087, "learning_rate": 5.509755439465943e-06, "loss": 2.1801, "step": 4585 }, { "epoch": 1.5630538513974097, "grad_norm": 14.702030830553168, "learning_rate": 5.507782654120193e-06, "loss": 2.2213, "step": 4586 }, { "epoch": 1.5633946830265848, "grad_norm": 14.000137805129189, "learning_rate": 5.50580978889821e-06, "loss": 2.6444, "step": 4587 }, { "epoch": 1.56373551465576, "grad_norm": 22.51301905484996, "learning_rate": 5.503836844110334e-06, "loss": 2.0408, "step": 4588 }, { "epoch": 1.5640763462849352, "grad_norm": 12.647322205959572, "learning_rate": 5.501863820066918e-06, "loss": 2.0935, "step": 4589 }, { "epoch": 1.5644171779141103, "grad_norm": 16.00808512692911, "learning_rate": 5.499890717078322e-06, "loss": 2.3375, "step": 4590 }, { "epoch": 1.5647580095432856, "grad_norm": 16.040408127764202, "learning_rate": 5.49791753545493e-06, "loss": 2.6628, "step": 4591 }, { "epoch": 1.565098841172461, "grad_norm": 18.37728286925699, "learning_rate": 5.495944275507125e-06, "loss": 2.3967, "step": 4592 }, { "epoch": 1.565439672801636, "grad_norm": 16.39960534739319, "learning_rate": 5.493970937545313e-06, "loss": 1.7704, "step": 4593 }, { "epoch": 1.565780504430811, "grad_norm": 26.325334377958335, "learning_rate": 5.4919975218799045e-06, "loss": 2.2746, "step": 4594 }, { "epoch": 1.5661213360599864, "grad_norm": 20.420164168115935, "learning_rate": 5.490024028821328e-06, "loss": 2.6453, "step": 4595 }, { "epoch": 1.5664621676891617, "grad_norm": 14.143236931366648, "learning_rate": 5.488050458680021e-06, "loss": 1.6972, "step": 4596 }, { "epoch": 1.5668029993183368, "grad_norm": 17.89774372202911, "learning_rate": 5.486076811766436e-06, "loss": 2.3785, "step": 4597 }, { "epoch": 1.5671438309475119, "grad_norm": 19.283277481883054, "learning_rate": 5.484103088391029e-06, "loss": 2.1286, "step": 4598 }, { "epoch": 1.5674846625766872, "grad_norm": 25.421014864645883, "learning_rate": 5.482129288864283e-06, "loss": 1.8287, "step": 4599 }, { "epoch": 1.5678254942058623, "grad_norm": 14.646514939845268, "learning_rate": 5.480155413496679e-06, "loss": 2.3781, "step": 4600 }, { "epoch": 1.5681663258350373, "grad_norm": 13.98732859142388, "learning_rate": 5.478181462598719e-06, "loss": 2.2064, "step": 4601 }, { "epoch": 1.5685071574642127, "grad_norm": 32.41951813025049, "learning_rate": 5.476207436480909e-06, "loss": 2.4675, "step": 4602 }, { "epoch": 1.568847989093388, "grad_norm": 21.941174274870537, "learning_rate": 5.4742333354537745e-06, "loss": 2.6162, "step": 4603 }, { "epoch": 1.569188820722563, "grad_norm": 11.793073749845188, "learning_rate": 5.472259159827848e-06, "loss": 1.8835, "step": 4604 }, { "epoch": 1.5695296523517381, "grad_norm": 17.346573750732396, "learning_rate": 5.470284909913677e-06, "loss": 2.4639, "step": 4605 }, { "epoch": 1.5698704839809134, "grad_norm": 17.42582630901005, "learning_rate": 5.468310586021817e-06, "loss": 3.0802, "step": 4606 }, { "epoch": 1.5702113156100888, "grad_norm": 15.401741676969037, "learning_rate": 5.466336188462837e-06, "loss": 1.8691, "step": 4607 }, { "epoch": 1.5705521472392638, "grad_norm": 11.690478941968317, "learning_rate": 5.4643617175473185e-06, "loss": 2.6479, "step": 4608 }, { "epoch": 1.570892978868439, "grad_norm": 14.777477674091458, "learning_rate": 5.462387173585853e-06, "loss": 2.5138, "step": 4609 }, { "epoch": 1.5712338104976142, "grad_norm": 27.373978050325842, "learning_rate": 5.4604125568890454e-06, "loss": 2.1911, "step": 4610 }, { "epoch": 1.5715746421267893, "grad_norm": 16.599439860731973, "learning_rate": 5.458437867767509e-06, "loss": 1.7195, "step": 4611 }, { "epoch": 1.5719154737559644, "grad_norm": 36.449446557991905, "learning_rate": 5.456463106531871e-06, "loss": 2.358, "step": 4612 }, { "epoch": 1.5722563053851397, "grad_norm": 19.085110698107933, "learning_rate": 5.454488273492769e-06, "loss": 2.5286, "step": 4613 }, { "epoch": 1.572597137014315, "grad_norm": 18.224970945778153, "learning_rate": 5.452513368960855e-06, "loss": 2.4288, "step": 4614 }, { "epoch": 1.5729379686434901, "grad_norm": 16.048249133512822, "learning_rate": 5.450538393246783e-06, "loss": 2.6852, "step": 4615 }, { "epoch": 1.5732788002726652, "grad_norm": 14.636668346392778, "learning_rate": 5.448563346661228e-06, "loss": 1.8733, "step": 4616 }, { "epoch": 1.5736196319018405, "grad_norm": 15.540391698782331, "learning_rate": 5.446588229514877e-06, "loss": 2.1636, "step": 4617 }, { "epoch": 1.5739604635310158, "grad_norm": 16.037902516657606, "learning_rate": 5.444613042118418e-06, "loss": 2.3338, "step": 4618 }, { "epoch": 1.574301295160191, "grad_norm": 20.03005865690343, "learning_rate": 5.442637784782556e-06, "loss": 2.5549, "step": 4619 }, { "epoch": 1.574642126789366, "grad_norm": 22.534853996436805, "learning_rate": 5.44066245781801e-06, "loss": 1.5909, "step": 4620 }, { "epoch": 1.5749829584185413, "grad_norm": 14.190853960766699, "learning_rate": 5.438687061535503e-06, "loss": 2.2622, "step": 4621 }, { "epoch": 1.5753237900477164, "grad_norm": 22.014241252830185, "learning_rate": 5.436711596245777e-06, "loss": 2.1066, "step": 4622 }, { "epoch": 1.5756646216768915, "grad_norm": 14.455333023933067, "learning_rate": 5.434736062259579e-06, "loss": 2.4928, "step": 4623 }, { "epoch": 1.5760054533060668, "grad_norm": 17.76242840541652, "learning_rate": 5.432760459887666e-06, "loss": 2.1982, "step": 4624 }, { "epoch": 1.576346284935242, "grad_norm": 33.272882083104676, "learning_rate": 5.4307847894408116e-06, "loss": 1.9724, "step": 4625 }, { "epoch": 1.5766871165644172, "grad_norm": 15.24615022442608, "learning_rate": 5.4288090512297945e-06, "loss": 2.5744, "step": 4626 }, { "epoch": 1.5770279481935923, "grad_norm": 22.122656958167198, "learning_rate": 5.426833245565406e-06, "loss": 2.3038, "step": 4627 }, { "epoch": 1.5773687798227676, "grad_norm": 14.14748460551022, "learning_rate": 5.42485737275845e-06, "loss": 2.3185, "step": 4628 }, { "epoch": 1.5777096114519429, "grad_norm": 14.687321056910124, "learning_rate": 5.422881433119737e-06, "loss": 2.5036, "step": 4629 }, { "epoch": 1.578050443081118, "grad_norm": 31.663759619728303, "learning_rate": 5.42090542696009e-06, "loss": 2.3224, "step": 4630 }, { "epoch": 1.578391274710293, "grad_norm": 21.960932119217066, "learning_rate": 5.418929354590347e-06, "loss": 2.1939, "step": 4631 }, { "epoch": 1.5787321063394684, "grad_norm": 20.72785634735985, "learning_rate": 5.416953216321345e-06, "loss": 2.6083, "step": 4632 }, { "epoch": 1.5790729379686435, "grad_norm": 31.731885029212318, "learning_rate": 5.414977012463945e-06, "loss": 2.1992, "step": 4633 }, { "epoch": 1.5794137695978185, "grad_norm": 12.276380973376998, "learning_rate": 5.413000743329009e-06, "loss": 1.9109, "step": 4634 }, { "epoch": 1.5797546012269938, "grad_norm": 20.470569860085885, "learning_rate": 5.411024409227411e-06, "loss": 2.52, "step": 4635 }, { "epoch": 1.5800954328561692, "grad_norm": 20.962358531567894, "learning_rate": 5.409048010470039e-06, "loss": 2.204, "step": 4636 }, { "epoch": 1.5804362644853442, "grad_norm": 22.81665795611026, "learning_rate": 5.407071547367784e-06, "loss": 2.3528, "step": 4637 }, { "epoch": 1.5807770961145193, "grad_norm": 12.939003824448685, "learning_rate": 5.405095020231555e-06, "loss": 2.1259, "step": 4638 }, { "epoch": 1.5811179277436946, "grad_norm": 29.888831792634384, "learning_rate": 5.403118429372267e-06, "loss": 2.421, "step": 4639 }, { "epoch": 1.58145875937287, "grad_norm": 15.632711955291112, "learning_rate": 5.4011417751008455e-06, "loss": 2.2353, "step": 4640 }, { "epoch": 1.581799591002045, "grad_norm": 17.4122137130586, "learning_rate": 5.399165057728226e-06, "loss": 2.4716, "step": 4641 }, { "epoch": 1.5821404226312201, "grad_norm": 19.174409827525633, "learning_rate": 5.397188277565355e-06, "loss": 2.6457, "step": 4642 }, { "epoch": 1.5824812542603954, "grad_norm": 22.00045746583584, "learning_rate": 5.395211434923186e-06, "loss": 1.8662, "step": 4643 }, { "epoch": 1.5828220858895705, "grad_norm": 18.913033993964564, "learning_rate": 5.393234530112684e-06, "loss": 2.5742, "step": 4644 }, { "epoch": 1.5831629175187456, "grad_norm": 17.012479972847277, "learning_rate": 5.391257563444825e-06, "loss": 2.387, "step": 4645 }, { "epoch": 1.583503749147921, "grad_norm": 33.54248938150719, "learning_rate": 5.389280535230594e-06, "loss": 2.368, "step": 4646 }, { "epoch": 1.5838445807770962, "grad_norm": 16.515161711463605, "learning_rate": 5.387303445780986e-06, "loss": 2.2156, "step": 4647 }, { "epoch": 1.5841854124062713, "grad_norm": 26.625199851611374, "learning_rate": 5.385326295407005e-06, "loss": 2.7548, "step": 4648 }, { "epoch": 1.5845262440354464, "grad_norm": 14.752083247463844, "learning_rate": 5.383349084419661e-06, "loss": 2.8358, "step": 4649 }, { "epoch": 1.5848670756646217, "grad_norm": 23.351771785959752, "learning_rate": 5.381371813129982e-06, "loss": 1.723, "step": 4650 }, { "epoch": 1.585207907293797, "grad_norm": 16.545803286328656, "learning_rate": 5.379394481848999e-06, "loss": 2.2993, "step": 4651 }, { "epoch": 1.585548738922972, "grad_norm": 20.851525567762618, "learning_rate": 5.3774170908877535e-06, "loss": 2.6296, "step": 4652 }, { "epoch": 1.5858895705521472, "grad_norm": 20.6575013287245, "learning_rate": 5.375439640557295e-06, "loss": 2.0242, "step": 4653 }, { "epoch": 1.5862304021813225, "grad_norm": 19.784085820823893, "learning_rate": 5.373462131168689e-06, "loss": 1.8094, "step": 4654 }, { "epoch": 1.5865712338104976, "grad_norm": 34.98160824540084, "learning_rate": 5.371484563033002e-06, "loss": 3.028, "step": 4655 }, { "epoch": 1.5869120654396727, "grad_norm": 13.747895588667932, "learning_rate": 5.369506936461316e-06, "loss": 2.2318, "step": 4656 }, { "epoch": 1.587252897068848, "grad_norm": 28.88492809379754, "learning_rate": 5.367529251764716e-06, "loss": 2.1059, "step": 4657 }, { "epoch": 1.5875937286980233, "grad_norm": 20.939272519636326, "learning_rate": 5.365551509254302e-06, "loss": 2.8905, "step": 4658 }, { "epoch": 1.5879345603271984, "grad_norm": 19.68541811777297, "learning_rate": 5.3635737092411825e-06, "loss": 2.6345, "step": 4659 }, { "epoch": 1.5882753919563735, "grad_norm": 14.97019151365539, "learning_rate": 5.361595852036468e-06, "loss": 2.1119, "step": 4660 }, { "epoch": 1.5886162235855488, "grad_norm": 24.36875551307099, "learning_rate": 5.359617937951289e-06, "loss": 2.3617, "step": 4661 }, { "epoch": 1.588957055214724, "grad_norm": 17.26557903885738, "learning_rate": 5.357639967296776e-06, "loss": 1.9659, "step": 4662 }, { "epoch": 1.5892978868438992, "grad_norm": 26.069391860040454, "learning_rate": 5.355661940384072e-06, "loss": 1.9836, "step": 4663 }, { "epoch": 1.5896387184730743, "grad_norm": 12.805942372836391, "learning_rate": 5.353683857524329e-06, "loss": 2.2034, "step": 4664 }, { "epoch": 1.5899795501022496, "grad_norm": 14.647399733685965, "learning_rate": 5.351705719028708e-06, "loss": 1.9891, "step": 4665 }, { "epoch": 1.5903203817314246, "grad_norm": 14.79863326128338, "learning_rate": 5.349727525208377e-06, "loss": 2.9953, "step": 4666 }, { "epoch": 1.5906612133605997, "grad_norm": 17.321718464026656, "learning_rate": 5.347749276374513e-06, "loss": 2.4116, "step": 4667 }, { "epoch": 1.591002044989775, "grad_norm": 17.237722513848905, "learning_rate": 5.345770972838306e-06, "loss": 2.251, "step": 4668 }, { "epoch": 1.5913428766189504, "grad_norm": 14.746371598225174, "learning_rate": 5.343792614910947e-06, "loss": 2.0302, "step": 4669 }, { "epoch": 1.5916837082481254, "grad_norm": 11.27656665031137, "learning_rate": 5.34181420290364e-06, "loss": 1.6276, "step": 4670 }, { "epoch": 1.5920245398773005, "grad_norm": 15.013949050076866, "learning_rate": 5.3398357371276e-06, "loss": 2.7505, "step": 4671 }, { "epoch": 1.5923653715064758, "grad_norm": 32.66609629689877, "learning_rate": 5.337857217894046e-06, "loss": 2.338, "step": 4672 }, { "epoch": 1.5927062031356511, "grad_norm": 15.020324791586102, "learning_rate": 5.335878645514206e-06, "loss": 2.6731, "step": 4673 }, { "epoch": 1.5930470347648262, "grad_norm": 19.34260333220306, "learning_rate": 5.333900020299316e-06, "loss": 2.5834, "step": 4674 }, { "epoch": 1.5933878663940013, "grad_norm": 12.654032536471469, "learning_rate": 5.331921342560627e-06, "loss": 2.0597, "step": 4675 }, { "epoch": 1.5937286980231766, "grad_norm": 12.134939730321133, "learning_rate": 5.329942612609388e-06, "loss": 1.7519, "step": 4676 }, { "epoch": 1.5940695296523517, "grad_norm": 20.158133418284404, "learning_rate": 5.327963830756863e-06, "loss": 1.9644, "step": 4677 }, { "epoch": 1.5944103612815268, "grad_norm": 20.96096656109158, "learning_rate": 5.32598499731432e-06, "loss": 2.0334, "step": 4678 }, { "epoch": 1.594751192910702, "grad_norm": 18.42856920966636, "learning_rate": 5.324006112593041e-06, "loss": 2.3241, "step": 4679 }, { "epoch": 1.5950920245398774, "grad_norm": 14.498728384850468, "learning_rate": 5.3220271769043105e-06, "loss": 2.239, "step": 4680 }, { "epoch": 1.5954328561690525, "grad_norm": 18.04582973732253, "learning_rate": 5.320048190559423e-06, "loss": 2.1997, "step": 4681 }, { "epoch": 1.5957736877982276, "grad_norm": 12.940081024612457, "learning_rate": 5.318069153869682e-06, "loss": 2.0214, "step": 4682 }, { "epoch": 1.596114519427403, "grad_norm": 18.342847186490165, "learning_rate": 5.3160900671463955e-06, "loss": 1.6898, "step": 4683 }, { "epoch": 1.5964553510565782, "grad_norm": 18.918886888436464, "learning_rate": 5.3141109307008835e-06, "loss": 2.7664, "step": 4684 }, { "epoch": 1.596796182685753, "grad_norm": 19.993758593123385, "learning_rate": 5.3121317448444735e-06, "loss": 1.767, "step": 4685 }, { "epoch": 1.5971370143149284, "grad_norm": 15.957796262172817, "learning_rate": 5.310152509888495e-06, "loss": 2.2264, "step": 4686 }, { "epoch": 1.5974778459441037, "grad_norm": 20.088795351504174, "learning_rate": 5.3081732261442916e-06, "loss": 2.0241, "step": 4687 }, { "epoch": 1.5978186775732788, "grad_norm": 19.615194506479, "learning_rate": 5.306193893923216e-06, "loss": 3.0074, "step": 4688 }, { "epoch": 1.5981595092024539, "grad_norm": 15.600081064851654, "learning_rate": 5.3042145135366195e-06, "loss": 2.6235, "step": 4689 }, { "epoch": 1.5985003408316292, "grad_norm": 17.106532332801283, "learning_rate": 5.302235085295871e-06, "loss": 1.7402, "step": 4690 }, { "epoch": 1.5988411724608045, "grad_norm": 18.17030682858321, "learning_rate": 5.300255609512338e-06, "loss": 2.705, "step": 4691 }, { "epoch": 1.5991820040899796, "grad_norm": 16.894671729406653, "learning_rate": 5.298276086497404e-06, "loss": 1.7992, "step": 4692 }, { "epoch": 1.5995228357191547, "grad_norm": 12.681007445466138, "learning_rate": 5.2962965165624545e-06, "loss": 1.9953, "step": 4693 }, { "epoch": 1.59986366734833, "grad_norm": 10.741763659900377, "learning_rate": 5.294316900018883e-06, "loss": 2.1954, "step": 4694 }, { "epoch": 1.6002044989775053, "grad_norm": 15.027571840301112, "learning_rate": 5.292337237178092e-06, "loss": 2.4663, "step": 4695 }, { "epoch": 1.6005453306066801, "grad_norm": 16.59236091297349, "learning_rate": 5.2903575283514885e-06, "loss": 2.7809, "step": 4696 }, { "epoch": 1.6008861622358554, "grad_norm": 33.35124859797374, "learning_rate": 5.28837777385049e-06, "loss": 2.1735, "step": 4697 }, { "epoch": 1.6012269938650308, "grad_norm": 21.708288789890354, "learning_rate": 5.2863979739865216e-06, "loss": 2.1864, "step": 4698 }, { "epoch": 1.6015678254942058, "grad_norm": 14.857351977769206, "learning_rate": 5.284418129071011e-06, "loss": 2.9296, "step": 4699 }, { "epoch": 1.601908657123381, "grad_norm": 14.074763473061298, "learning_rate": 5.282438239415395e-06, "loss": 1.9322, "step": 4700 }, { "epoch": 1.6022494887525562, "grad_norm": 14.481627065090272, "learning_rate": 5.280458305331121e-06, "loss": 1.9119, "step": 4701 }, { "epoch": 1.6025903203817315, "grad_norm": 33.67325225332307, "learning_rate": 5.2784783271296394e-06, "loss": 2.4168, "step": 4702 }, { "epoch": 1.6029311520109066, "grad_norm": 24.132126788456738, "learning_rate": 5.2764983051224084e-06, "loss": 2.4514, "step": 4703 }, { "epoch": 1.6032719836400817, "grad_norm": 37.02300922469965, "learning_rate": 5.274518239620892e-06, "loss": 2.2424, "step": 4704 }, { "epoch": 1.603612815269257, "grad_norm": 18.993172517000364, "learning_rate": 5.272538130936564e-06, "loss": 1.7784, "step": 4705 }, { "epoch": 1.6039536468984323, "grad_norm": 16.241374015988928, "learning_rate": 5.270557979380903e-06, "loss": 1.9841, "step": 4706 }, { "epoch": 1.6042944785276072, "grad_norm": 20.99607196944566, "learning_rate": 5.2685777852653975e-06, "loss": 2.4077, "step": 4707 }, { "epoch": 1.6046353101567825, "grad_norm": 36.59788171488439, "learning_rate": 5.266597548901534e-06, "loss": 2.3502, "step": 4708 }, { "epoch": 1.6049761417859578, "grad_norm": 24.28572392413443, "learning_rate": 5.2646172706008154e-06, "loss": 2.2109, "step": 4709 }, { "epoch": 1.605316973415133, "grad_norm": 25.415939668100158, "learning_rate": 5.262636950674749e-06, "loss": 2.0621, "step": 4710 }, { "epoch": 1.605657805044308, "grad_norm": 13.56751998705234, "learning_rate": 5.260656589434842e-06, "loss": 2.2551, "step": 4711 }, { "epoch": 1.6059986366734833, "grad_norm": 19.900139700215078, "learning_rate": 5.258676187192616e-06, "loss": 2.456, "step": 4712 }, { "epoch": 1.6063394683026586, "grad_norm": 16.221156369863387, "learning_rate": 5.256695744259598e-06, "loss": 2.9659, "step": 4713 }, { "epoch": 1.6066802999318337, "grad_norm": 17.818954171216124, "learning_rate": 5.254715260947316e-06, "loss": 2.2736, "step": 4714 }, { "epoch": 1.6070211315610088, "grad_norm": 19.47428759931484, "learning_rate": 5.252734737567311e-06, "loss": 1.7805, "step": 4715 }, { "epoch": 1.607361963190184, "grad_norm": 21.472074002066456, "learning_rate": 5.250754174431124e-06, "loss": 2.4977, "step": 4716 }, { "epoch": 1.6077027948193592, "grad_norm": 20.971111915184963, "learning_rate": 5.248773571850307e-06, "loss": 2.0834, "step": 4717 }, { "epoch": 1.6080436264485343, "grad_norm": 15.12183463491661, "learning_rate": 5.246792930136419e-06, "loss": 2.3588, "step": 4718 }, { "epoch": 1.6083844580777096, "grad_norm": 15.667656363243958, "learning_rate": 5.24481224960102e-06, "loss": 2.1863, "step": 4719 }, { "epoch": 1.6087252897068849, "grad_norm": 17.900783769826695, "learning_rate": 5.242831530555679e-06, "loss": 2.4117, "step": 4720 }, { "epoch": 1.60906612133606, "grad_norm": 19.178168114243654, "learning_rate": 5.240850773311972e-06, "loss": 2.1345, "step": 4721 }, { "epoch": 1.609406952965235, "grad_norm": 15.34408154091941, "learning_rate": 5.238869978181478e-06, "loss": 2.3254, "step": 4722 }, { "epoch": 1.6097477845944104, "grad_norm": 18.133759110979035, "learning_rate": 5.2368891454757866e-06, "loss": 2.9138, "step": 4723 }, { "epoch": 1.6100886162235857, "grad_norm": 20.243321198268298, "learning_rate": 5.234908275506489e-06, "loss": 2.0969, "step": 4724 }, { "epoch": 1.6104294478527608, "grad_norm": 15.572955209296648, "learning_rate": 5.232927368585186e-06, "loss": 2.2005, "step": 4725 }, { "epoch": 1.6107702794819359, "grad_norm": 14.8809692043145, "learning_rate": 5.23094642502348e-06, "loss": 2.4485, "step": 4726 }, { "epoch": 1.6111111111111112, "grad_norm": 16.777786829735565, "learning_rate": 5.228965445132983e-06, "loss": 2.8474, "step": 4727 }, { "epoch": 1.6114519427402862, "grad_norm": 33.17230873085475, "learning_rate": 5.226984429225309e-06, "loss": 2.2256, "step": 4728 }, { "epoch": 1.6117927743694613, "grad_norm": 19.142008093520378, "learning_rate": 5.225003377612079e-06, "loss": 2.038, "step": 4729 }, { "epoch": 1.6121336059986366, "grad_norm": 24.293424113706873, "learning_rate": 5.223022290604925e-06, "loss": 2.5534, "step": 4730 }, { "epoch": 1.612474437627812, "grad_norm": 13.196059410576522, "learning_rate": 5.2210411685154745e-06, "loss": 2.6308, "step": 4731 }, { "epoch": 1.612815269256987, "grad_norm": 20.6360949933755, "learning_rate": 5.21906001165537e-06, "loss": 2.0029, "step": 4732 }, { "epoch": 1.6131561008861621, "grad_norm": 19.212019041253082, "learning_rate": 5.217078820336253e-06, "loss": 2.4892, "step": 4733 }, { "epoch": 1.6134969325153374, "grad_norm": 17.321423995659565, "learning_rate": 5.215097594869774e-06, "loss": 2.7709, "step": 4734 }, { "epoch": 1.6138377641445127, "grad_norm": 20.38551578703487, "learning_rate": 5.213116335567586e-06, "loss": 2.4508, "step": 4735 }, { "epoch": 1.6141785957736878, "grad_norm": 19.86845065363426, "learning_rate": 5.211135042741351e-06, "loss": 2.3064, "step": 4736 }, { "epoch": 1.614519427402863, "grad_norm": 13.841332929360192, "learning_rate": 5.209153716702733e-06, "loss": 2.1819, "step": 4737 }, { "epoch": 1.6148602590320382, "grad_norm": 18.24039413893503, "learning_rate": 5.207172357763402e-06, "loss": 2.4102, "step": 4738 }, { "epoch": 1.6152010906612133, "grad_norm": 14.881981072411888, "learning_rate": 5.205190966235035e-06, "loss": 2.5261, "step": 4739 }, { "epoch": 1.6155419222903884, "grad_norm": 20.551582022853243, "learning_rate": 5.203209542429312e-06, "loss": 1.6278, "step": 4740 }, { "epoch": 1.6158827539195637, "grad_norm": 13.474641849243861, "learning_rate": 5.20122808665792e-06, "loss": 2.7306, "step": 4741 }, { "epoch": 1.616223585548739, "grad_norm": 18.43494496861583, "learning_rate": 5.199246599232548e-06, "loss": 2.3161, "step": 4742 }, { "epoch": 1.616564417177914, "grad_norm": 16.36862930952595, "learning_rate": 5.197265080464893e-06, "loss": 2.2114, "step": 4743 }, { "epoch": 1.6169052488070892, "grad_norm": 12.164881095641489, "learning_rate": 5.195283530666656e-06, "loss": 2.2103, "step": 4744 }, { "epoch": 1.6172460804362645, "grad_norm": 19.062949925329484, "learning_rate": 5.1933019501495415e-06, "loss": 2.2634, "step": 4745 }, { "epoch": 1.6175869120654398, "grad_norm": 13.967171043749913, "learning_rate": 5.19132033922526e-06, "loss": 2.4176, "step": 4746 }, { "epoch": 1.617927743694615, "grad_norm": 15.521960410970243, "learning_rate": 5.189338698205529e-06, "loss": 2.2158, "step": 4747 }, { "epoch": 1.61826857532379, "grad_norm": 17.011347431662664, "learning_rate": 5.187357027402067e-06, "loss": 2.1872, "step": 4748 }, { "epoch": 1.6186094069529653, "grad_norm": 20.060964160219566, "learning_rate": 5.185375327126599e-06, "loss": 2.2628, "step": 4749 }, { "epoch": 1.6189502385821404, "grad_norm": 32.72687994805059, "learning_rate": 5.183393597690853e-06, "loss": 2.9579, "step": 4750 }, { "epoch": 1.6192910702113155, "grad_norm": 24.785079292444326, "learning_rate": 5.181411839406565e-06, "loss": 2.3804, "step": 4751 }, { "epoch": 1.6196319018404908, "grad_norm": 17.37546739977131, "learning_rate": 5.179430052585471e-06, "loss": 2.47, "step": 4752 }, { "epoch": 1.619972733469666, "grad_norm": 15.909225882420248, "learning_rate": 5.177448237539318e-06, "loss": 1.9115, "step": 4753 }, { "epoch": 1.6203135650988412, "grad_norm": 17.368994538762525, "learning_rate": 5.175466394579849e-06, "loss": 1.8512, "step": 4754 }, { "epoch": 1.6206543967280163, "grad_norm": 15.512697461623949, "learning_rate": 5.173484524018818e-06, "loss": 2.7518, "step": 4755 }, { "epoch": 1.6209952283571916, "grad_norm": 37.44817097395937, "learning_rate": 5.171502626167981e-06, "loss": 2.3608, "step": 4756 }, { "epoch": 1.6213360599863669, "grad_norm": 19.241435576494794, "learning_rate": 5.169520701339098e-06, "loss": 1.745, "step": 4757 }, { "epoch": 1.621676891615542, "grad_norm": 15.731827138648343, "learning_rate": 5.167538749843934e-06, "loss": 2.3703, "step": 4758 }, { "epoch": 1.622017723244717, "grad_norm": 25.469653034956696, "learning_rate": 5.165556771994257e-06, "loss": 2.2461, "step": 4759 }, { "epoch": 1.6223585548738924, "grad_norm": 13.365726569577935, "learning_rate": 5.163574768101841e-06, "loss": 2.3313, "step": 4760 }, { "epoch": 1.6226993865030674, "grad_norm": 15.686460452573593, "learning_rate": 5.161592738478464e-06, "loss": 2.2375, "step": 4761 }, { "epoch": 1.6230402181322425, "grad_norm": 22.673660663741778, "learning_rate": 5.159610683435905e-06, "loss": 2.5936, "step": 4762 }, { "epoch": 1.6233810497614178, "grad_norm": 13.212635133517308, "learning_rate": 5.157628603285949e-06, "loss": 2.3545, "step": 4763 }, { "epoch": 1.6237218813905931, "grad_norm": 12.90440764387199, "learning_rate": 5.155646498340389e-06, "loss": 2.1459, "step": 4764 }, { "epoch": 1.6240627130197682, "grad_norm": 13.298021234710733, "learning_rate": 5.153664368911013e-06, "loss": 2.3657, "step": 4765 }, { "epoch": 1.6244035446489433, "grad_norm": 16.65360138319781, "learning_rate": 5.15168221530962e-06, "loss": 2.2958, "step": 4766 }, { "epoch": 1.6247443762781186, "grad_norm": 16.485407916263572, "learning_rate": 5.149700037848012e-06, "loss": 2.3042, "step": 4767 }, { "epoch": 1.625085207907294, "grad_norm": 15.845620752654689, "learning_rate": 5.147717836837991e-06, "loss": 2.0949, "step": 4768 }, { "epoch": 1.625426039536469, "grad_norm": 19.804021256504576, "learning_rate": 5.145735612591367e-06, "loss": 2.1761, "step": 4769 }, { "epoch": 1.6257668711656441, "grad_norm": 24.754072417566196, "learning_rate": 5.143753365419952e-06, "loss": 2.7028, "step": 4770 }, { "epoch": 1.6261077027948194, "grad_norm": 19.53357521391298, "learning_rate": 5.141771095635558e-06, "loss": 2.6186, "step": 4771 }, { "epoch": 1.6264485344239945, "grad_norm": 23.82577299945505, "learning_rate": 5.139788803550008e-06, "loss": 2.4368, "step": 4772 }, { "epoch": 1.6267893660531696, "grad_norm": 30.264751988445088, "learning_rate": 5.137806489475122e-06, "loss": 2.3501, "step": 4773 }, { "epoch": 1.627130197682345, "grad_norm": 17.12254932437324, "learning_rate": 5.135824153722727e-06, "loss": 2.3244, "step": 4774 }, { "epoch": 1.6274710293115202, "grad_norm": 15.659519394492966, "learning_rate": 5.133841796604651e-06, "loss": 2.7478, "step": 4775 }, { "epoch": 1.6278118609406953, "grad_norm": 15.007658451170252, "learning_rate": 5.131859418432729e-06, "loss": 2.4151, "step": 4776 }, { "epoch": 1.6281526925698704, "grad_norm": 16.940219180002245, "learning_rate": 5.1298770195187945e-06, "loss": 2.4073, "step": 4777 }, { "epoch": 1.6284935241990457, "grad_norm": 20.768181147983828, "learning_rate": 5.12789460017469e-06, "loss": 2.0284, "step": 4778 }, { "epoch": 1.628834355828221, "grad_norm": 12.282241845207508, "learning_rate": 5.1259121607122535e-06, "loss": 2.1163, "step": 4779 }, { "epoch": 1.629175187457396, "grad_norm": 16.62626946948372, "learning_rate": 5.123929701443332e-06, "loss": 2.6893, "step": 4780 }, { "epoch": 1.6295160190865712, "grad_norm": 14.978821886382468, "learning_rate": 5.121947222679777e-06, "loss": 2.4665, "step": 4781 }, { "epoch": 1.6298568507157465, "grad_norm": 12.037444231150701, "learning_rate": 5.119964724733439e-06, "loss": 1.9648, "step": 4782 }, { "epoch": 1.6301976823449216, "grad_norm": 15.013474527442463, "learning_rate": 5.11798220791617e-06, "loss": 1.8592, "step": 4783 }, { "epoch": 1.6305385139740967, "grad_norm": 13.809771764110652, "learning_rate": 5.115999672539832e-06, "loss": 2.0466, "step": 4784 }, { "epoch": 1.630879345603272, "grad_norm": 17.602000552611656, "learning_rate": 5.114017118916282e-06, "loss": 2.5019, "step": 4785 }, { "epoch": 1.6312201772324473, "grad_norm": 13.219674369637023, "learning_rate": 5.112034547357387e-06, "loss": 2.0652, "step": 4786 }, { "epoch": 1.6315610088616224, "grad_norm": 19.54180231760908, "learning_rate": 5.110051958175012e-06, "loss": 2.1537, "step": 4787 }, { "epoch": 1.6319018404907975, "grad_norm": 21.620645232618184, "learning_rate": 5.1080693516810235e-06, "loss": 2.5667, "step": 4788 }, { "epoch": 1.6322426721199728, "grad_norm": 14.595476524823887, "learning_rate": 5.106086728187299e-06, "loss": 2.4769, "step": 4789 }, { "epoch": 1.632583503749148, "grad_norm": 23.228747608346424, "learning_rate": 5.104104088005707e-06, "loss": 2.1188, "step": 4790 }, { "epoch": 1.6329243353783232, "grad_norm": 23.10978846429566, "learning_rate": 5.102121431448128e-06, "loss": 2.699, "step": 4791 }, { "epoch": 1.6332651670074982, "grad_norm": 15.725475298502737, "learning_rate": 5.100138758826443e-06, "loss": 1.7101, "step": 4792 }, { "epoch": 1.6336059986366736, "grad_norm": 17.439045581009527, "learning_rate": 5.09815607045253e-06, "loss": 2.1544, "step": 4793 }, { "epoch": 1.6339468302658486, "grad_norm": 18.260646434981513, "learning_rate": 5.096173366638278e-06, "loss": 1.599, "step": 4794 }, { "epoch": 1.6342876618950237, "grad_norm": 15.470465157883865, "learning_rate": 5.094190647695574e-06, "loss": 2.0468, "step": 4795 }, { "epoch": 1.634628493524199, "grad_norm": 11.945594353890595, "learning_rate": 5.092207913936305e-06, "loss": 1.7252, "step": 4796 }, { "epoch": 1.6349693251533743, "grad_norm": 16.725441609671837, "learning_rate": 5.090225165672363e-06, "loss": 2.1198, "step": 4797 }, { "epoch": 1.6353101567825494, "grad_norm": 22.729403300419904, "learning_rate": 5.088242403215644e-06, "loss": 2.2904, "step": 4798 }, { "epoch": 1.6356509884117245, "grad_norm": 15.897247407412719, "learning_rate": 5.086259626878044e-06, "loss": 2.122, "step": 4799 }, { "epoch": 1.6359918200408998, "grad_norm": 20.711543053587665, "learning_rate": 5.08427683697146e-06, "loss": 2.9403, "step": 4800 }, { "epoch": 1.6363326516700751, "grad_norm": 22.522529841669037, "learning_rate": 5.082294033807795e-06, "loss": 2.6746, "step": 4801 }, { "epoch": 1.6366734832992502, "grad_norm": 22.101987064961193, "learning_rate": 5.080311217698951e-06, "loss": 2.4564, "step": 4802 }, { "epoch": 1.6370143149284253, "grad_norm": 32.90558934216906, "learning_rate": 5.078328388956833e-06, "loss": 1.9571, "step": 4803 }, { "epoch": 1.6373551465576006, "grad_norm": 19.14793507257964, "learning_rate": 5.076345547893348e-06, "loss": 2.1909, "step": 4804 }, { "epoch": 1.6376959781867757, "grad_norm": 18.352457655197064, "learning_rate": 5.074362694820403e-06, "loss": 2.0345, "step": 4805 }, { "epoch": 1.6380368098159508, "grad_norm": 15.089267865227239, "learning_rate": 5.072379830049911e-06, "loss": 2.1916, "step": 4806 }, { "epoch": 1.638377641445126, "grad_norm": 23.966183511737448, "learning_rate": 5.070396953893783e-06, "loss": 1.9454, "step": 4807 }, { "epoch": 1.6387184730743014, "grad_norm": 28.210483462763236, "learning_rate": 5.0684140666639345e-06, "loss": 2.1839, "step": 4808 }, { "epoch": 1.6390593047034765, "grad_norm": 24.737500107936814, "learning_rate": 5.0664311686722815e-06, "loss": 2.0775, "step": 4809 }, { "epoch": 1.6394001363326516, "grad_norm": 19.993980958664725, "learning_rate": 5.06444826023074e-06, "loss": 2.6643, "step": 4810 }, { "epoch": 1.639740967961827, "grad_norm": 14.833239156404161, "learning_rate": 5.062465341651232e-06, "loss": 2.2405, "step": 4811 }, { "epoch": 1.6400817995910022, "grad_norm": 15.821212108749355, "learning_rate": 5.0604824132456765e-06, "loss": 2.1622, "step": 4812 }, { "epoch": 1.6404226312201773, "grad_norm": 23.560491151451604, "learning_rate": 5.0584994753259966e-06, "loss": 2.3103, "step": 4813 }, { "epoch": 1.6407634628493524, "grad_norm": 16.852690996721776, "learning_rate": 5.056516528204114e-06, "loss": 2.3291, "step": 4814 }, { "epoch": 1.6411042944785277, "grad_norm": 24.007437716883278, "learning_rate": 5.0545335721919596e-06, "loss": 2.849, "step": 4815 }, { "epoch": 1.6414451261077028, "grad_norm": 24.396171592605988, "learning_rate": 5.052550607601457e-06, "loss": 2.054, "step": 4816 }, { "epoch": 1.6417859577368779, "grad_norm": 15.383905501698818, "learning_rate": 5.050567634744533e-06, "loss": 2.5014, "step": 4817 }, { "epoch": 1.6421267893660532, "grad_norm": 16.73178780697449, "learning_rate": 5.04858465393312e-06, "loss": 2.1528, "step": 4818 }, { "epoch": 1.6424676209952285, "grad_norm": 16.02832814173762, "learning_rate": 5.046601665479148e-06, "loss": 2.1172, "step": 4819 }, { "epoch": 1.6428084526244036, "grad_norm": 19.603000541777494, "learning_rate": 5.044618669694547e-06, "loss": 2.5521, "step": 4820 }, { "epoch": 1.6431492842535786, "grad_norm": 13.568436781013848, "learning_rate": 5.0426356668912525e-06, "loss": 2.435, "step": 4821 }, { "epoch": 1.643490115882754, "grad_norm": 14.93610055903479, "learning_rate": 5.0406526573811966e-06, "loss": 2.1941, "step": 4822 }, { "epoch": 1.6438309475119293, "grad_norm": 23.322803826595266, "learning_rate": 5.038669641476318e-06, "loss": 1.9353, "step": 4823 }, { "epoch": 1.6441717791411041, "grad_norm": 16.156614640526254, "learning_rate": 5.03668661948855e-06, "loss": 2.4958, "step": 4824 }, { "epoch": 1.6445126107702794, "grad_norm": 25.82073162175326, "learning_rate": 5.03470359172983e-06, "loss": 3.171, "step": 4825 }, { "epoch": 1.6448534423994547, "grad_norm": 30.310682296511363, "learning_rate": 5.032720558512098e-06, "loss": 2.5145, "step": 4826 }, { "epoch": 1.6451942740286298, "grad_norm": 15.585772932654487, "learning_rate": 5.030737520147291e-06, "loss": 2.2444, "step": 4827 }, { "epoch": 1.645535105657805, "grad_norm": 14.101216544507487, "learning_rate": 5.028754476947349e-06, "loss": 2.4341, "step": 4828 }, { "epoch": 1.6458759372869802, "grad_norm": 18.895827940972733, "learning_rate": 5.026771429224217e-06, "loss": 2.3236, "step": 4829 }, { "epoch": 1.6462167689161555, "grad_norm": 17.835604882777584, "learning_rate": 5.024788377289828e-06, "loss": 1.96, "step": 4830 }, { "epoch": 1.6465576005453306, "grad_norm": 17.673852774192852, "learning_rate": 5.02280532145613e-06, "loss": 2.7025, "step": 4831 }, { "epoch": 1.6468984321745057, "grad_norm": 15.741174190530963, "learning_rate": 5.020822262035065e-06, "loss": 2.1089, "step": 4832 }, { "epoch": 1.647239263803681, "grad_norm": 17.225769688678888, "learning_rate": 5.018839199338576e-06, "loss": 1.8117, "step": 4833 }, { "epoch": 1.6475800954328563, "grad_norm": 24.384903442323818, "learning_rate": 5.016856133678605e-06, "loss": 2.4974, "step": 4834 }, { "epoch": 1.6479209270620312, "grad_norm": 22.484074069174945, "learning_rate": 5.0148730653670965e-06, "loss": 1.9383, "step": 4835 }, { "epoch": 1.6482617586912065, "grad_norm": 31.134001950738526, "learning_rate": 5.012889994715996e-06, "loss": 2.5408, "step": 4836 }, { "epoch": 1.6486025903203818, "grad_norm": 21.10057525019606, "learning_rate": 5.01090692203725e-06, "loss": 2.4163, "step": 4837 }, { "epoch": 1.648943421949557, "grad_norm": 15.171681661219187, "learning_rate": 5.008923847642799e-06, "loss": 1.9838, "step": 4838 }, { "epoch": 1.649284253578732, "grad_norm": 22.634330841862422, "learning_rate": 5.006940771844591e-06, "loss": 2.2104, "step": 4839 }, { "epoch": 1.6496250852079073, "grad_norm": 23.89552132949375, "learning_rate": 5.004957694954574e-06, "loss": 1.7793, "step": 4840 }, { "epoch": 1.6499659168370826, "grad_norm": 15.868331440515032, "learning_rate": 5.00297461728469e-06, "loss": 2.1969, "step": 4841 }, { "epoch": 1.6503067484662577, "grad_norm": 30.840714463703495, "learning_rate": 5.000991539146889e-06, "loss": 1.8217, "step": 4842 }, { "epoch": 1.6506475800954328, "grad_norm": 11.655584201273205, "learning_rate": 4.9990084608531135e-06, "loss": 1.8678, "step": 4843 }, { "epoch": 1.650988411724608, "grad_norm": 17.745952633838357, "learning_rate": 4.9970253827153106e-06, "loss": 2.3801, "step": 4844 }, { "epoch": 1.6513292433537834, "grad_norm": 13.256273736349959, "learning_rate": 4.995042305045429e-06, "loss": 2.1993, "step": 4845 }, { "epoch": 1.6516700749829583, "grad_norm": 17.24608164002074, "learning_rate": 4.99305922815541e-06, "loss": 2.0378, "step": 4846 }, { "epoch": 1.6520109066121336, "grad_norm": 18.912909329330212, "learning_rate": 4.9910761523572025e-06, "loss": 1.8693, "step": 4847 }, { "epoch": 1.6523517382413089, "grad_norm": 13.849034057001004, "learning_rate": 4.989093077962753e-06, "loss": 1.8478, "step": 4848 }, { "epoch": 1.652692569870484, "grad_norm": 17.74369819876824, "learning_rate": 4.987110005284004e-06, "loss": 2.4935, "step": 4849 }, { "epoch": 1.653033401499659, "grad_norm": 15.250830906937846, "learning_rate": 4.985126934632905e-06, "loss": 2.4647, "step": 4850 }, { "epoch": 1.6533742331288344, "grad_norm": 15.632411789692027, "learning_rate": 4.983143866321397e-06, "loss": 1.8764, "step": 4851 }, { "epoch": 1.6537150647580097, "grad_norm": 22.09949710095985, "learning_rate": 4.981160800661424e-06, "loss": 2.0234, "step": 4852 }, { "epoch": 1.6540558963871848, "grad_norm": 15.61977871313864, "learning_rate": 4.979177737964935e-06, "loss": 2.1148, "step": 4853 }, { "epoch": 1.6543967280163598, "grad_norm": 13.089205702701424, "learning_rate": 4.977194678543871e-06, "loss": 2.0548, "step": 4854 }, { "epoch": 1.6547375596455351, "grad_norm": 23.514249481844537, "learning_rate": 4.975211622710173e-06, "loss": 1.567, "step": 4855 }, { "epoch": 1.6550783912747105, "grad_norm": 17.69959200609305, "learning_rate": 4.973228570775786e-06, "loss": 2.813, "step": 4856 }, { "epoch": 1.6554192229038853, "grad_norm": 17.352051255263866, "learning_rate": 4.9712455230526516e-06, "loss": 2.2817, "step": 4857 }, { "epoch": 1.6557600545330606, "grad_norm": 20.436005830405563, "learning_rate": 4.96926247985271e-06, "loss": 2.1565, "step": 4858 }, { "epoch": 1.656100886162236, "grad_norm": 20.400297859309894, "learning_rate": 4.967279441487904e-06, "loss": 3.1033, "step": 4859 }, { "epoch": 1.656441717791411, "grad_norm": 21.393142278212867, "learning_rate": 4.965296408270171e-06, "loss": 2.2849, "step": 4860 }, { "epoch": 1.6567825494205861, "grad_norm": 14.433747193069165, "learning_rate": 4.963313380511452e-06, "loss": 1.9965, "step": 4861 }, { "epoch": 1.6571233810497614, "grad_norm": 26.98246370705561, "learning_rate": 4.961330358523684e-06, "loss": 2.3008, "step": 4862 }, { "epoch": 1.6574642126789367, "grad_norm": 24.332708900590536, "learning_rate": 4.959347342618804e-06, "loss": 3.3244, "step": 4863 }, { "epoch": 1.6578050443081118, "grad_norm": 16.542521763664105, "learning_rate": 4.957364333108749e-06, "loss": 2.402, "step": 4864 }, { "epoch": 1.658145875937287, "grad_norm": 17.46628913074591, "learning_rate": 4.9553813303054535e-06, "loss": 1.4389, "step": 4865 }, { "epoch": 1.6584867075664622, "grad_norm": 18.656604709073857, "learning_rate": 4.953398334520855e-06, "loss": 2.146, "step": 4866 }, { "epoch": 1.6588275391956373, "grad_norm": 11.509538978170022, "learning_rate": 4.951415346066883e-06, "loss": 2.1461, "step": 4867 }, { "epoch": 1.6591683708248124, "grad_norm": 17.168822705537114, "learning_rate": 4.9494323652554685e-06, "loss": 2.1975, "step": 4868 }, { "epoch": 1.6595092024539877, "grad_norm": 23.197580602784573, "learning_rate": 4.947449392398545e-06, "loss": 1.3506, "step": 4869 }, { "epoch": 1.659850034083163, "grad_norm": 15.456505731961155, "learning_rate": 4.945466427808043e-06, "loss": 2.1847, "step": 4870 }, { "epoch": 1.660190865712338, "grad_norm": 21.809131500036912, "learning_rate": 4.943483471795887e-06, "loss": 1.7611, "step": 4871 }, { "epoch": 1.6605316973415132, "grad_norm": 12.523364374652981, "learning_rate": 4.941500524674006e-06, "loss": 1.8492, "step": 4872 }, { "epoch": 1.6608725289706885, "grad_norm": 16.024018531676717, "learning_rate": 4.939517586754326e-06, "loss": 2.1231, "step": 4873 }, { "epoch": 1.6612133605998638, "grad_norm": 16.975222806112605, "learning_rate": 4.937534658348769e-06, "loss": 2.1385, "step": 4874 }, { "epoch": 1.6615541922290389, "grad_norm": 15.541730796333379, "learning_rate": 4.935551739769261e-06, "loss": 2.2677, "step": 4875 }, { "epoch": 1.661895023858214, "grad_norm": 14.048063722393826, "learning_rate": 4.93356883132772e-06, "loss": 2.2127, "step": 4876 }, { "epoch": 1.6622358554873893, "grad_norm": 11.877094847879551, "learning_rate": 4.9315859333360654e-06, "loss": 1.9457, "step": 4877 }, { "epoch": 1.6625766871165644, "grad_norm": 49.192800440008504, "learning_rate": 4.9296030461062175e-06, "loss": 2.2148, "step": 4878 }, { "epoch": 1.6629175187457395, "grad_norm": 17.765849953388358, "learning_rate": 4.927620169950091e-06, "loss": 1.7352, "step": 4879 }, { "epoch": 1.6632583503749148, "grad_norm": 12.929007551086661, "learning_rate": 4.925637305179597e-06, "loss": 2.176, "step": 4880 }, { "epoch": 1.66359918200409, "grad_norm": 15.967093257551834, "learning_rate": 4.923654452106653e-06, "loss": 2.2764, "step": 4881 }, { "epoch": 1.6639400136332652, "grad_norm": 23.99230646759936, "learning_rate": 4.9216716110431685e-06, "loss": 2.6787, "step": 4882 }, { "epoch": 1.6642808452624402, "grad_norm": 17.7134454644471, "learning_rate": 4.919688782301049e-06, "loss": 2.0142, "step": 4883 }, { "epoch": 1.6646216768916156, "grad_norm": 22.315477637627293, "learning_rate": 4.917705966192206e-06, "loss": 2.2634, "step": 4884 }, { "epoch": 1.6649625085207909, "grad_norm": 31.273217038731897, "learning_rate": 4.915723163028541e-06, "loss": 1.419, "step": 4885 }, { "epoch": 1.665303340149966, "grad_norm": 13.477989815617509, "learning_rate": 4.9137403731219565e-06, "loss": 2.0024, "step": 4886 }, { "epoch": 1.665644171779141, "grad_norm": 20.655423807096867, "learning_rate": 4.911757596784358e-06, "loss": 1.5946, "step": 4887 }, { "epoch": 1.6659850034083163, "grad_norm": 14.724358176768634, "learning_rate": 4.90977483432764e-06, "loss": 1.7564, "step": 4888 }, { "epoch": 1.6663258350374914, "grad_norm": 18.843859304057467, "learning_rate": 4.907792086063697e-06, "loss": 2.1621, "step": 4889 }, { "epoch": 1.6666666666666665, "grad_norm": 15.310754677905447, "learning_rate": 4.905809352304428e-06, "loss": 2.4078, "step": 4890 }, { "epoch": 1.6670074982958418, "grad_norm": 15.969084549118236, "learning_rate": 4.903826633361723e-06, "loss": 2.5587, "step": 4891 }, { "epoch": 1.6673483299250171, "grad_norm": 16.978205620895732, "learning_rate": 4.901843929547472e-06, "loss": 2.4837, "step": 4892 }, { "epoch": 1.6676891615541922, "grad_norm": 15.272938264457052, "learning_rate": 4.899861241173559e-06, "loss": 1.775, "step": 4893 }, { "epoch": 1.6680299931833673, "grad_norm": 16.264205845074425, "learning_rate": 4.897878568551873e-06, "loss": 2.1323, "step": 4894 }, { "epoch": 1.6683708248125426, "grad_norm": 19.52549980377032, "learning_rate": 4.895895911994296e-06, "loss": 2.5831, "step": 4895 }, { "epoch": 1.668711656441718, "grad_norm": 14.199883066482922, "learning_rate": 4.893913271812704e-06, "loss": 2.3024, "step": 4896 }, { "epoch": 1.669052488070893, "grad_norm": 20.4515055655972, "learning_rate": 4.891930648318978e-06, "loss": 2.4019, "step": 4897 }, { "epoch": 1.669393319700068, "grad_norm": 14.167507544485932, "learning_rate": 4.88994804182499e-06, "loss": 2.2696, "step": 4898 }, { "epoch": 1.6697341513292434, "grad_norm": 27.947327597860966, "learning_rate": 4.8879654526426135e-06, "loss": 2.5619, "step": 4899 }, { "epoch": 1.6700749829584185, "grad_norm": 15.295747060212172, "learning_rate": 4.885982881083719e-06, "loss": 2.163, "step": 4900 }, { "epoch": 1.6704158145875936, "grad_norm": 35.158550609064946, "learning_rate": 4.88400032746017e-06, "loss": 2.7459, "step": 4901 }, { "epoch": 1.670756646216769, "grad_norm": 29.894273263985298, "learning_rate": 4.882017792083831e-06, "loss": 2.5929, "step": 4902 }, { "epoch": 1.6710974778459442, "grad_norm": 15.93299947517147, "learning_rate": 4.880035275266563e-06, "loss": 2.3599, "step": 4903 }, { "epoch": 1.6714383094751193, "grad_norm": 16.075908713881255, "learning_rate": 4.878052777320225e-06, "loss": 2.4784, "step": 4904 }, { "epoch": 1.6717791411042944, "grad_norm": 13.742616981462417, "learning_rate": 4.876070298556669e-06, "loss": 1.8688, "step": 4905 }, { "epoch": 1.6721199727334697, "grad_norm": 18.26330189049245, "learning_rate": 4.874087839287748e-06, "loss": 2.3571, "step": 4906 }, { "epoch": 1.672460804362645, "grad_norm": 20.842041067755485, "learning_rate": 4.8721053998253134e-06, "loss": 3.1098, "step": 4907 }, { "epoch": 1.67280163599182, "grad_norm": 14.76931700122803, "learning_rate": 4.8701229804812055e-06, "loss": 1.9781, "step": 4908 }, { "epoch": 1.6731424676209952, "grad_norm": 23.298843285178013, "learning_rate": 4.868140581567273e-06, "loss": 2.2187, "step": 4909 }, { "epoch": 1.6734832992501705, "grad_norm": 13.559466353436344, "learning_rate": 4.8661582033953494e-06, "loss": 1.8774, "step": 4910 }, { "epoch": 1.6738241308793456, "grad_norm": 29.85295285903798, "learning_rate": 4.8641758462772735e-06, "loss": 2.2068, "step": 4911 }, { "epoch": 1.6741649625085206, "grad_norm": 15.342913604091422, "learning_rate": 4.862193510524879e-06, "loss": 2.3586, "step": 4912 }, { "epoch": 1.674505794137696, "grad_norm": 16.76933419345027, "learning_rate": 4.860211196449994e-06, "loss": 2.0511, "step": 4913 }, { "epoch": 1.6748466257668713, "grad_norm": 28.02869085349051, "learning_rate": 4.858228904364443e-06, "loss": 2.4141, "step": 4914 }, { "epoch": 1.6751874573960464, "grad_norm": 17.689455764666764, "learning_rate": 4.85624663458005e-06, "loss": 2.5473, "step": 4915 }, { "epoch": 1.6755282890252214, "grad_norm": 26.303027360194918, "learning_rate": 4.854264387408634e-06, "loss": 2.1478, "step": 4916 }, { "epoch": 1.6758691206543967, "grad_norm": 12.569721863256435, "learning_rate": 4.8522821631620085e-06, "loss": 2.3867, "step": 4917 }, { "epoch": 1.676209952283572, "grad_norm": 14.926095315457456, "learning_rate": 4.8502999621519895e-06, "loss": 2.4966, "step": 4918 }, { "epoch": 1.6765507839127471, "grad_norm": 16.883433173491202, "learning_rate": 4.848317784690381e-06, "loss": 2.8784, "step": 4919 }, { "epoch": 1.6768916155419222, "grad_norm": 20.769540457335854, "learning_rate": 4.8463356310889874e-06, "loss": 2.2929, "step": 4920 }, { "epoch": 1.6772324471710975, "grad_norm": 14.65173236100568, "learning_rate": 4.844353501659613e-06, "loss": 2.1918, "step": 4921 }, { "epoch": 1.6775732788002726, "grad_norm": 14.57057199930505, "learning_rate": 4.842371396714053e-06, "loss": 2.2541, "step": 4922 }, { "epoch": 1.6779141104294477, "grad_norm": 13.404638649321324, "learning_rate": 4.840389316564096e-06, "loss": 2.5274, "step": 4923 }, { "epoch": 1.678254942058623, "grad_norm": 20.777193294147885, "learning_rate": 4.838407261521537e-06, "loss": 2.61, "step": 4924 }, { "epoch": 1.6785957736877983, "grad_norm": 15.855991366823298, "learning_rate": 4.83642523189816e-06, "loss": 2.5258, "step": 4925 }, { "epoch": 1.6789366053169734, "grad_norm": 19.642929434680763, "learning_rate": 4.834443228005746e-06, "loss": 2.6445, "step": 4926 }, { "epoch": 1.6792774369461485, "grad_norm": 10.675065410072174, "learning_rate": 4.832461250156068e-06, "loss": 1.8268, "step": 4927 }, { "epoch": 1.6796182685753238, "grad_norm": 15.263062645879616, "learning_rate": 4.830479298660904e-06, "loss": 2.2715, "step": 4928 }, { "epoch": 1.6799591002044991, "grad_norm": 24.402358715476627, "learning_rate": 4.828497373832022e-06, "loss": 1.9165, "step": 4929 }, { "epoch": 1.6802999318336742, "grad_norm": 13.881597047055982, "learning_rate": 4.826515475981183e-06, "loss": 2.2159, "step": 4930 }, { "epoch": 1.6806407634628493, "grad_norm": 22.106003916923964, "learning_rate": 4.824533605420152e-06, "loss": 2.1401, "step": 4931 }, { "epoch": 1.6809815950920246, "grad_norm": 13.382691782157409, "learning_rate": 4.822551762460685e-06, "loss": 1.8347, "step": 4932 }, { "epoch": 1.6813224267211997, "grad_norm": 17.2861720002287, "learning_rate": 4.820569947414529e-06, "loss": 2.2032, "step": 4933 }, { "epoch": 1.6816632583503748, "grad_norm": 17.890966198420244, "learning_rate": 4.818588160593437e-06, "loss": 2.076, "step": 4934 }, { "epoch": 1.68200408997955, "grad_norm": 21.049524315489254, "learning_rate": 4.816606402309148e-06, "loss": 2.2251, "step": 4935 }, { "epoch": 1.6823449216087254, "grad_norm": 15.818198016967274, "learning_rate": 4.814624672873401e-06, "loss": 1.809, "step": 4936 }, { "epoch": 1.6826857532379005, "grad_norm": 13.797913196634136, "learning_rate": 4.812642972597934e-06, "loss": 2.0723, "step": 4937 }, { "epoch": 1.6830265848670756, "grad_norm": 15.487075623181802, "learning_rate": 4.8106613017944725e-06, "loss": 1.7367, "step": 4938 }, { "epoch": 1.6833674164962509, "grad_norm": 14.990970216205517, "learning_rate": 4.80867966077474e-06, "loss": 2.6722, "step": 4939 }, { "epoch": 1.6837082481254262, "grad_norm": 17.345450390318447, "learning_rate": 4.806698049850459e-06, "loss": 2.1805, "step": 4940 }, { "epoch": 1.6840490797546013, "grad_norm": 15.835540681307156, "learning_rate": 4.804716469333347e-06, "loss": 2.2509, "step": 4941 }, { "epoch": 1.6843899113837764, "grad_norm": 18.751722228196975, "learning_rate": 4.802734919535108e-06, "loss": 2.594, "step": 4942 }, { "epoch": 1.6847307430129517, "grad_norm": 26.41975549366376, "learning_rate": 4.8007534007674536e-06, "loss": 1.7505, "step": 4943 }, { "epoch": 1.6850715746421268, "grad_norm": 16.01807754570169, "learning_rate": 4.798771913342082e-06, "loss": 2.241, "step": 4944 }, { "epoch": 1.6854124062713018, "grad_norm": 14.544618701257948, "learning_rate": 4.796790457570688e-06, "loss": 2.0941, "step": 4945 }, { "epoch": 1.6857532379004772, "grad_norm": 20.67356058323263, "learning_rate": 4.794809033764966e-06, "loss": 2.8077, "step": 4946 }, { "epoch": 1.6860940695296525, "grad_norm": 12.96968645630812, "learning_rate": 4.7928276422366e-06, "loss": 2.3481, "step": 4947 }, { "epoch": 1.6864349011588275, "grad_norm": 17.324300850863715, "learning_rate": 4.790846283297268e-06, "loss": 2.6171, "step": 4948 }, { "epoch": 1.6867757327880026, "grad_norm": 12.510058045961754, "learning_rate": 4.788864957258651e-06, "loss": 2.2917, "step": 4949 }, { "epoch": 1.687116564417178, "grad_norm": 12.173541806070626, "learning_rate": 4.786883664432415e-06, "loss": 1.9895, "step": 4950 }, { "epoch": 1.6874573960463533, "grad_norm": 19.065045230317388, "learning_rate": 4.784902405130227e-06, "loss": 2.2342, "step": 4951 }, { "epoch": 1.6877982276755283, "grad_norm": 16.725864813280253, "learning_rate": 4.782921179663749e-06, "loss": 2.2417, "step": 4952 }, { "epoch": 1.6881390593047034, "grad_norm": 14.96871736121806, "learning_rate": 4.7809399883446315e-06, "loss": 2.708, "step": 4953 }, { "epoch": 1.6884798909338787, "grad_norm": 16.13075201561084, "learning_rate": 4.778958831484525e-06, "loss": 2.1985, "step": 4954 }, { "epoch": 1.6888207225630538, "grad_norm": 16.394742029215042, "learning_rate": 4.776977709395076e-06, "loss": 1.9046, "step": 4955 }, { "epoch": 1.689161554192229, "grad_norm": 18.456901939349812, "learning_rate": 4.774996622387922e-06, "loss": 2.6707, "step": 4956 }, { "epoch": 1.6895023858214042, "grad_norm": 13.803279661159113, "learning_rate": 4.773015570774694e-06, "loss": 2.5971, "step": 4957 }, { "epoch": 1.6898432174505795, "grad_norm": 16.893487807995882, "learning_rate": 4.771034554867018e-06, "loss": 1.7319, "step": 4958 }, { "epoch": 1.6901840490797546, "grad_norm": 15.23408464182785, "learning_rate": 4.7690535749765216e-06, "loss": 2.0159, "step": 4959 }, { "epoch": 1.6905248807089297, "grad_norm": 26.036005707837028, "learning_rate": 4.767072631414815e-06, "loss": 2.3289, "step": 4960 }, { "epoch": 1.690865712338105, "grad_norm": 17.77381576271729, "learning_rate": 4.7650917244935114e-06, "loss": 2.6536, "step": 4961 }, { "epoch": 1.6912065439672803, "grad_norm": 15.955281095686487, "learning_rate": 4.763110854524214e-06, "loss": 2.1367, "step": 4962 }, { "epoch": 1.6915473755964554, "grad_norm": 18.95732938108048, "learning_rate": 4.761130021818524e-06, "loss": 2.116, "step": 4963 }, { "epoch": 1.6918882072256305, "grad_norm": 17.18260962579657, "learning_rate": 4.75914922668803e-06, "loss": 2.9141, "step": 4964 }, { "epoch": 1.6922290388548058, "grad_norm": 23.765438512231132, "learning_rate": 4.757168469444323e-06, "loss": 2.5122, "step": 4965 }, { "epoch": 1.6925698704839809, "grad_norm": 27.221711048126924, "learning_rate": 4.755187750398983e-06, "loss": 1.6651, "step": 4966 }, { "epoch": 1.692910702113156, "grad_norm": 17.26529528283423, "learning_rate": 4.753207069863582e-06, "loss": 2.8065, "step": 4967 }, { "epoch": 1.6932515337423313, "grad_norm": 14.766002676509075, "learning_rate": 4.751226428149694e-06, "loss": 2.0708, "step": 4968 }, { "epoch": 1.6935923653715066, "grad_norm": 14.490491003803482, "learning_rate": 4.749245825568877e-06, "loss": 1.8389, "step": 4969 }, { "epoch": 1.6939331970006817, "grad_norm": 16.311430643856674, "learning_rate": 4.74726526243269e-06, "loss": 2.2167, "step": 4970 }, { "epoch": 1.6942740286298568, "grad_norm": 20.51224267725315, "learning_rate": 4.7452847390526845e-06, "loss": 2.4753, "step": 4971 }, { "epoch": 1.694614860259032, "grad_norm": 15.998872104301114, "learning_rate": 4.743304255740404e-06, "loss": 2.3924, "step": 4972 }, { "epoch": 1.6949556918882074, "grad_norm": 21.969704044336932, "learning_rate": 4.741323812807384e-06, "loss": 2.1038, "step": 4973 }, { "epoch": 1.6952965235173822, "grad_norm": 20.857320622433484, "learning_rate": 4.739343410565158e-06, "loss": 2.6294, "step": 4974 }, { "epoch": 1.6956373551465576, "grad_norm": 15.33479390703286, "learning_rate": 4.737363049325254e-06, "loss": 2.1678, "step": 4975 }, { "epoch": 1.6959781867757329, "grad_norm": 15.779572425444512, "learning_rate": 4.7353827293991845e-06, "loss": 2.3927, "step": 4976 }, { "epoch": 1.696319018404908, "grad_norm": 17.975629766021278, "learning_rate": 4.7334024510984675e-06, "loss": 1.9814, "step": 4977 }, { "epoch": 1.696659850034083, "grad_norm": 18.992852492961397, "learning_rate": 4.731422214734605e-06, "loss": 2.3055, "step": 4978 }, { "epoch": 1.6970006816632583, "grad_norm": 17.111598955194136, "learning_rate": 4.729442020619096e-06, "loss": 1.9773, "step": 4979 }, { "epoch": 1.6973415132924337, "grad_norm": 12.678859464766719, "learning_rate": 4.7274618690634365e-06, "loss": 2.8256, "step": 4980 }, { "epoch": 1.6976823449216087, "grad_norm": 15.496882671231207, "learning_rate": 4.725481760379111e-06, "loss": 2.8654, "step": 4981 }, { "epoch": 1.6980231765507838, "grad_norm": 14.312427312094934, "learning_rate": 4.723501694877593e-06, "loss": 2.6084, "step": 4982 }, { "epoch": 1.6983640081799591, "grad_norm": 14.61087637856195, "learning_rate": 4.721521672870361e-06, "loss": 2.4197, "step": 4983 }, { "epoch": 1.6987048398091344, "grad_norm": 31.190890536501406, "learning_rate": 4.719541694668881e-06, "loss": 2.3004, "step": 4984 }, { "epoch": 1.6990456714383093, "grad_norm": 16.89284468185952, "learning_rate": 4.717561760584605e-06, "loss": 1.7995, "step": 4985 }, { "epoch": 1.6993865030674846, "grad_norm": 13.264087949690238, "learning_rate": 4.715581870928992e-06, "loss": 2.2271, "step": 4986 }, { "epoch": 1.69972733469666, "grad_norm": 24.090322489434865, "learning_rate": 4.71360202601348e-06, "loss": 2.3944, "step": 4987 }, { "epoch": 1.700068166325835, "grad_norm": 12.281494142033734, "learning_rate": 4.71162222614951e-06, "loss": 2.0376, "step": 4988 }, { "epoch": 1.70040899795501, "grad_norm": 19.42608334168245, "learning_rate": 4.709642471648513e-06, "loss": 2.4892, "step": 4989 }, { "epoch": 1.7007498295841854, "grad_norm": 13.840633157436393, "learning_rate": 4.707662762821909e-06, "loss": 2.0881, "step": 4990 }, { "epoch": 1.7010906612133607, "grad_norm": 17.785708219836415, "learning_rate": 4.7056830999811194e-06, "loss": 1.8074, "step": 4991 }, { "epoch": 1.7014314928425358, "grad_norm": 12.881354906407255, "learning_rate": 4.703703483437546e-06, "loss": 2.0515, "step": 4992 }, { "epoch": 1.701772324471711, "grad_norm": 18.704130560543963, "learning_rate": 4.701723913502597e-06, "loss": 1.7811, "step": 4993 }, { "epoch": 1.7021131561008862, "grad_norm": 16.111017852117204, "learning_rate": 4.699744390487663e-06, "loss": 2.4018, "step": 4994 }, { "epoch": 1.7024539877300615, "grad_norm": 24.382318597748345, "learning_rate": 4.69776491470413e-06, "loss": 2.2085, "step": 4995 }, { "epoch": 1.7027948193592364, "grad_norm": 19.045664746262155, "learning_rate": 4.695785486463381e-06, "loss": 1.8228, "step": 4996 }, { "epoch": 1.7031356509884117, "grad_norm": 19.9124084507416, "learning_rate": 4.693806106076787e-06, "loss": 2.9119, "step": 4997 }, { "epoch": 1.703476482617587, "grad_norm": 16.318265263110035, "learning_rate": 4.691826773855708e-06, "loss": 2.4178, "step": 4998 }, { "epoch": 1.703817314246762, "grad_norm": 15.000251074367135, "learning_rate": 4.6898474901115065e-06, "loss": 2.6046, "step": 4999 }, { "epoch": 1.7041581458759372, "grad_norm": 15.794854278668598, "learning_rate": 4.68786825515553e-06, "loss": 1.9026, "step": 5000 }, { "epoch": 1.7044989775051125, "grad_norm": 14.811668104385472, "learning_rate": 4.685889069299117e-06, "loss": 2.115, "step": 5001 }, { "epoch": 1.7048398091342878, "grad_norm": 23.465623936253554, "learning_rate": 4.683909932853606e-06, "loss": 2.4674, "step": 5002 }, { "epoch": 1.7051806407634629, "grad_norm": 14.274206156385405, "learning_rate": 4.6819308461303206e-06, "loss": 2.7018, "step": 5003 }, { "epoch": 1.705521472392638, "grad_norm": 14.571891207388248, "learning_rate": 4.6799518094405775e-06, "loss": 2.3773, "step": 5004 }, { "epoch": 1.7058623040218133, "grad_norm": 30.605502195564902, "learning_rate": 4.67797282309569e-06, "loss": 1.8841, "step": 5005 }, { "epoch": 1.7062031356509886, "grad_norm": 18.595630235523746, "learning_rate": 4.675993887406961e-06, "loss": 2.4681, "step": 5006 }, { "epoch": 1.7065439672801634, "grad_norm": 18.71228450597498, "learning_rate": 4.6740150026856804e-06, "loss": 2.4879, "step": 5007 }, { "epoch": 1.7068847989093388, "grad_norm": 17.00728349067932, "learning_rate": 4.672036169243139e-06, "loss": 1.6327, "step": 5008 }, { "epoch": 1.707225630538514, "grad_norm": 16.422401488061585, "learning_rate": 4.670057387390614e-06, "loss": 1.6663, "step": 5009 }, { "epoch": 1.7075664621676891, "grad_norm": 19.40166610647862, "learning_rate": 4.668078657439374e-06, "loss": 2.9595, "step": 5010 }, { "epoch": 1.7079072937968642, "grad_norm": 12.997921374983479, "learning_rate": 4.666099979700684e-06, "loss": 1.9456, "step": 5011 }, { "epoch": 1.7082481254260395, "grad_norm": 21.721912000367276, "learning_rate": 4.664121354485796e-06, "loss": 2.3154, "step": 5012 }, { "epoch": 1.7085889570552149, "grad_norm": 19.584310106391953, "learning_rate": 4.662142782105955e-06, "loss": 2.4619, "step": 5013 }, { "epoch": 1.70892978868439, "grad_norm": 19.742934072927405, "learning_rate": 4.660164262872402e-06, "loss": 2.3353, "step": 5014 }, { "epoch": 1.709270620313565, "grad_norm": 19.88429579683072, "learning_rate": 4.658185797096362e-06, "loss": 2.8665, "step": 5015 }, { "epoch": 1.7096114519427403, "grad_norm": 20.84082344588187, "learning_rate": 4.656207385089054e-06, "loss": 2.3705, "step": 5016 }, { "epoch": 1.7099522835719154, "grad_norm": 16.091225729103108, "learning_rate": 4.654229027161696e-06, "loss": 2.118, "step": 5017 }, { "epoch": 1.7102931152010905, "grad_norm": 12.272798253103634, "learning_rate": 4.6522507236254885e-06, "loss": 2.1762, "step": 5018 }, { "epoch": 1.7106339468302658, "grad_norm": 13.0740553228587, "learning_rate": 4.6502724747916235e-06, "loss": 2.3089, "step": 5019 }, { "epoch": 1.7109747784594411, "grad_norm": 20.516167750461612, "learning_rate": 4.648294280971294e-06, "loss": 2.0399, "step": 5020 }, { "epoch": 1.7113156100886162, "grad_norm": 18.057423938125282, "learning_rate": 4.646316142475673e-06, "loss": 2.3713, "step": 5021 }, { "epoch": 1.7116564417177913, "grad_norm": 11.453226918672748, "learning_rate": 4.644338059615931e-06, "loss": 2.1004, "step": 5022 }, { "epoch": 1.7119972733469666, "grad_norm": 14.106917564161913, "learning_rate": 4.642360032703226e-06, "loss": 1.9267, "step": 5023 }, { "epoch": 1.712338104976142, "grad_norm": 13.490179943198866, "learning_rate": 4.640382062048713e-06, "loss": 1.7777, "step": 5024 }, { "epoch": 1.712678936605317, "grad_norm": 15.513063791719384, "learning_rate": 4.638404147963533e-06, "loss": 2.8815, "step": 5025 }, { "epoch": 1.713019768234492, "grad_norm": 17.657979011829852, "learning_rate": 4.636426290758819e-06, "loss": 2.1548, "step": 5026 }, { "epoch": 1.7133605998636674, "grad_norm": 15.514592007902005, "learning_rate": 4.634448490745699e-06, "loss": 1.9357, "step": 5027 }, { "epoch": 1.7137014314928425, "grad_norm": 37.90727319544818, "learning_rate": 4.632470748235286e-06, "loss": 2.0881, "step": 5028 }, { "epoch": 1.7140422631220176, "grad_norm": 12.488995256178825, "learning_rate": 4.630493063538685e-06, "loss": 1.5465, "step": 5029 }, { "epoch": 1.7143830947511929, "grad_norm": 13.113242810998464, "learning_rate": 4.6285154369669986e-06, "loss": 2.2363, "step": 5030 }, { "epoch": 1.7147239263803682, "grad_norm": 20.892187126514376, "learning_rate": 4.626537868831313e-06, "loss": 2.6393, "step": 5031 }, { "epoch": 1.7150647580095433, "grad_norm": 17.955107157160644, "learning_rate": 4.624560359442706e-06, "loss": 2.3694, "step": 5032 }, { "epoch": 1.7154055896387184, "grad_norm": 31.69603686618882, "learning_rate": 4.622582909112248e-06, "loss": 1.6855, "step": 5033 }, { "epoch": 1.7157464212678937, "grad_norm": 20.92571105598751, "learning_rate": 4.620605518151003e-06, "loss": 2.4929, "step": 5034 }, { "epoch": 1.716087252897069, "grad_norm": 22.37325239597225, "learning_rate": 4.6186281868700186e-06, "loss": 1.8494, "step": 5035 }, { "epoch": 1.716428084526244, "grad_norm": 17.78941433253117, "learning_rate": 4.6166509155803405e-06, "loss": 2.7589, "step": 5036 }, { "epoch": 1.7167689161554192, "grad_norm": 15.806373273595014, "learning_rate": 4.614673704592997e-06, "loss": 2.3025, "step": 5037 }, { "epoch": 1.7171097477845945, "grad_norm": 22.65381248418739, "learning_rate": 4.612696554219014e-06, "loss": 2.4517, "step": 5038 }, { "epoch": 1.7174505794137696, "grad_norm": 18.069697979530222, "learning_rate": 4.610719464769407e-06, "loss": 2.1864, "step": 5039 }, { "epoch": 1.7177914110429446, "grad_norm": 14.413616985708764, "learning_rate": 4.6087424365551765e-06, "loss": 2.3897, "step": 5040 }, { "epoch": 1.71813224267212, "grad_norm": 12.540587597005697, "learning_rate": 4.6067654698873175e-06, "loss": 2.3287, "step": 5041 }, { "epoch": 1.7184730743012953, "grad_norm": 18.424506304556694, "learning_rate": 4.604788565076817e-06, "loss": 2.4246, "step": 5042 }, { "epoch": 1.7188139059304703, "grad_norm": 20.343867475132917, "learning_rate": 4.602811722434648e-06, "loss": 2.8961, "step": 5043 }, { "epoch": 1.7191547375596454, "grad_norm": 19.633169297161047, "learning_rate": 4.600834942271775e-06, "loss": 1.9568, "step": 5044 }, { "epoch": 1.7194955691888207, "grad_norm": 17.669711128594532, "learning_rate": 4.598858224899156e-06, "loss": 1.9764, "step": 5045 }, { "epoch": 1.719836400817996, "grad_norm": 19.882448214897664, "learning_rate": 4.596881570627734e-06, "loss": 1.6391, "step": 5046 }, { "epoch": 1.7201772324471711, "grad_norm": 15.135827143042716, "learning_rate": 4.594904979768445e-06, "loss": 2.1068, "step": 5047 }, { "epoch": 1.7205180640763462, "grad_norm": 30.83329562692499, "learning_rate": 4.592928452632217e-06, "loss": 2.8664, "step": 5048 }, { "epoch": 1.7208588957055215, "grad_norm": 13.926476902004083, "learning_rate": 4.590951989529963e-06, "loss": 2.0289, "step": 5049 }, { "epoch": 1.7211997273346966, "grad_norm": 17.219471378700067, "learning_rate": 4.58897559077259e-06, "loss": 2.6694, "step": 5050 }, { "epoch": 1.7215405589638717, "grad_norm": 18.041916460440913, "learning_rate": 4.586999256670992e-06, "loss": 2.3146, "step": 5051 }, { "epoch": 1.721881390593047, "grad_norm": 13.742144857733097, "learning_rate": 4.5850229875360564e-06, "loss": 2.5892, "step": 5052 }, { "epoch": 1.7222222222222223, "grad_norm": 20.579810856198975, "learning_rate": 4.583046783678655e-06, "loss": 2.1115, "step": 5053 }, { "epoch": 1.7225630538513974, "grad_norm": 19.563694425929132, "learning_rate": 4.581070645409654e-06, "loss": 2.4788, "step": 5054 }, { "epoch": 1.7229038854805725, "grad_norm": 15.276617602723613, "learning_rate": 4.579094573039911e-06, "loss": 2.7262, "step": 5055 }, { "epoch": 1.7232447171097478, "grad_norm": 11.497066579589347, "learning_rate": 4.577118566880266e-06, "loss": 2.1753, "step": 5056 }, { "epoch": 1.7235855487389231, "grad_norm": 18.98928769432887, "learning_rate": 4.575142627241552e-06, "loss": 1.7823, "step": 5057 }, { "epoch": 1.7239263803680982, "grad_norm": 19.16138287217517, "learning_rate": 4.573166754434595e-06, "loss": 2.3013, "step": 5058 }, { "epoch": 1.7242672119972733, "grad_norm": 14.437691159203107, "learning_rate": 4.571190948770208e-06, "loss": 1.9086, "step": 5059 }, { "epoch": 1.7246080436264486, "grad_norm": 15.124520467508404, "learning_rate": 4.569215210559189e-06, "loss": 2.6679, "step": 5060 }, { "epoch": 1.7249488752556237, "grad_norm": 15.383353424039155, "learning_rate": 4.567239540112335e-06, "loss": 1.9739, "step": 5061 }, { "epoch": 1.7252897068847988, "grad_norm": 13.037316172348396, "learning_rate": 4.565263937740423e-06, "loss": 2.4938, "step": 5062 }, { "epoch": 1.725630538513974, "grad_norm": 16.78592772262329, "learning_rate": 4.563288403754222e-06, "loss": 2.2603, "step": 5063 }, { "epoch": 1.7259713701431494, "grad_norm": 15.480306618240919, "learning_rate": 4.561312938464498e-06, "loss": 3.0272, "step": 5064 }, { "epoch": 1.7263122017723245, "grad_norm": 20.7587876265301, "learning_rate": 4.559337542181993e-06, "loss": 2.4091, "step": 5065 }, { "epoch": 1.7266530334014996, "grad_norm": 14.343024602029418, "learning_rate": 4.557362215217445e-06, "loss": 2.3403, "step": 5066 }, { "epoch": 1.7269938650306749, "grad_norm": 20.968824031383352, "learning_rate": 4.555386957881584e-06, "loss": 2.0663, "step": 5067 }, { "epoch": 1.7273346966598502, "grad_norm": 10.862283418542374, "learning_rate": 4.553411770485126e-06, "loss": 2.0419, "step": 5068 }, { "epoch": 1.7276755282890253, "grad_norm": 27.69303715825103, "learning_rate": 4.551436653338771e-06, "loss": 2.069, "step": 5069 }, { "epoch": 1.7280163599182004, "grad_norm": 19.847169454817625, "learning_rate": 4.549461606753219e-06, "loss": 2.5615, "step": 5070 }, { "epoch": 1.7283571915473757, "grad_norm": 19.052175638891185, "learning_rate": 4.5474866310391486e-06, "loss": 2.3054, "step": 5071 }, { "epoch": 1.7286980231765507, "grad_norm": 15.497954106404382, "learning_rate": 4.545511726507232e-06, "loss": 2.1702, "step": 5072 }, { "epoch": 1.7290388548057258, "grad_norm": 13.697898763224496, "learning_rate": 4.543536893468131e-06, "loss": 2.1937, "step": 5073 }, { "epoch": 1.7293796864349011, "grad_norm": 13.6275565220429, "learning_rate": 4.541562132232494e-06, "loss": 2.1822, "step": 5074 }, { "epoch": 1.7297205180640765, "grad_norm": 21.36465248286139, "learning_rate": 4.539587443110956e-06, "loss": 2.3368, "step": 5075 }, { "epoch": 1.7300613496932515, "grad_norm": 21.821921037291453, "learning_rate": 4.537612826414148e-06, "loss": 2.0527, "step": 5076 }, { "epoch": 1.7304021813224266, "grad_norm": 20.809950401087153, "learning_rate": 4.535638282452684e-06, "loss": 2.0027, "step": 5077 }, { "epoch": 1.730743012951602, "grad_norm": 12.477891139395208, "learning_rate": 4.533663811537164e-06, "loss": 2.2599, "step": 5078 }, { "epoch": 1.7310838445807772, "grad_norm": 21.696442768999127, "learning_rate": 4.531689413978184e-06, "loss": 2.5802, "step": 5079 }, { "epoch": 1.7314246762099523, "grad_norm": 14.04987685028848, "learning_rate": 4.529715090086325e-06, "loss": 2.4352, "step": 5080 }, { "epoch": 1.7317655078391274, "grad_norm": 14.948690478854681, "learning_rate": 4.5277408401721525e-06, "loss": 2.3183, "step": 5081 }, { "epoch": 1.7321063394683027, "grad_norm": 14.555405661810765, "learning_rate": 4.525766664546227e-06, "loss": 1.766, "step": 5082 }, { "epoch": 1.7324471710974778, "grad_norm": 14.941222976219215, "learning_rate": 4.523792563519092e-06, "loss": 2.1094, "step": 5083 }, { "epoch": 1.732788002726653, "grad_norm": 22.63886615610604, "learning_rate": 4.521818537401282e-06, "loss": 2.2758, "step": 5084 }, { "epoch": 1.7331288343558282, "grad_norm": 14.699611729937146, "learning_rate": 4.5198445865033214e-06, "loss": 2.3905, "step": 5085 }, { "epoch": 1.7334696659850035, "grad_norm": 11.468955696632042, "learning_rate": 4.517870711135719e-06, "loss": 1.6515, "step": 5086 }, { "epoch": 1.7338104976141786, "grad_norm": 18.347654772365313, "learning_rate": 4.5158969116089715e-06, "loss": 2.4654, "step": 5087 }, { "epoch": 1.7341513292433537, "grad_norm": 18.069941495307585, "learning_rate": 4.513923188233566e-06, "loss": 2.2095, "step": 5088 }, { "epoch": 1.734492160872529, "grad_norm": 15.530963778042679, "learning_rate": 4.51194954131998e-06, "loss": 2.5167, "step": 5089 }, { "epoch": 1.7348329925017043, "grad_norm": 15.063219057545643, "learning_rate": 4.509975971178674e-06, "loss": 2.3769, "step": 5090 }, { "epoch": 1.7351738241308794, "grad_norm": 14.095217808063346, "learning_rate": 4.508002478120096e-06, "loss": 2.1875, "step": 5091 }, { "epoch": 1.7355146557600545, "grad_norm": 14.976659562729596, "learning_rate": 4.5060290624546885e-06, "loss": 2.1296, "step": 5092 }, { "epoch": 1.7358554873892298, "grad_norm": 15.333036972093163, "learning_rate": 4.504055724492877e-06, "loss": 1.9961, "step": 5093 }, { "epoch": 1.7361963190184049, "grad_norm": 18.0779770594952, "learning_rate": 4.502082464545072e-06, "loss": 1.8732, "step": 5094 }, { "epoch": 1.73653715064758, "grad_norm": 13.860268528008062, "learning_rate": 4.5001092829216785e-06, "loss": 2.2192, "step": 5095 }, { "epoch": 1.7368779822767553, "grad_norm": 19.413565426030022, "learning_rate": 4.498136179933085e-06, "loss": 3.109, "step": 5096 }, { "epoch": 1.7372188139059306, "grad_norm": 25.029378533432237, "learning_rate": 4.496163155889666e-06, "loss": 2.2057, "step": 5097 }, { "epoch": 1.7375596455351057, "grad_norm": 16.178090153596322, "learning_rate": 4.494190211101791e-06, "loss": 2.1624, "step": 5098 }, { "epoch": 1.7379004771642808, "grad_norm": 15.956750976835927, "learning_rate": 4.492217345879809e-06, "loss": 2.1793, "step": 5099 }, { "epoch": 1.738241308793456, "grad_norm": 13.002218738924466, "learning_rate": 4.490244560534058e-06, "loss": 2.2065, "step": 5100 }, { "epoch": 1.7385821404226314, "grad_norm": 22.17664732490765, "learning_rate": 4.488271855374868e-06, "loss": 1.6305, "step": 5101 }, { "epoch": 1.7389229720518065, "grad_norm": 15.384219379235127, "learning_rate": 4.4862992307125534e-06, "loss": 2.2589, "step": 5102 }, { "epoch": 1.7392638036809815, "grad_norm": 21.911975365077126, "learning_rate": 4.4843266868574125e-06, "loss": 2.0298, "step": 5103 }, { "epoch": 1.7396046353101569, "grad_norm": 14.482871388150002, "learning_rate": 4.482354224119739e-06, "loss": 2.3263, "step": 5104 }, { "epoch": 1.739945466939332, "grad_norm": 19.365151658231678, "learning_rate": 4.4803818428098045e-06, "loss": 1.9553, "step": 5105 }, { "epoch": 1.740286298568507, "grad_norm": 21.871090986840194, "learning_rate": 4.4784095432378746e-06, "loss": 2.3565, "step": 5106 }, { "epoch": 1.7406271301976823, "grad_norm": 19.783678384377726, "learning_rate": 4.476437325714201e-06, "loss": 2.4907, "step": 5107 }, { "epoch": 1.7409679618268576, "grad_norm": 22.93889834467417, "learning_rate": 4.47446519054902e-06, "loss": 2.9903, "step": 5108 }, { "epoch": 1.7413087934560327, "grad_norm": 18.32732181453398, "learning_rate": 4.472493138052557e-06, "loss": 2.6042, "step": 5109 }, { "epoch": 1.7416496250852078, "grad_norm": 17.59660705119435, "learning_rate": 4.470521168535022e-06, "loss": 2.5598, "step": 5110 }, { "epoch": 1.7419904567143831, "grad_norm": 15.021579305173448, "learning_rate": 4.468549282306617e-06, "loss": 1.9924, "step": 5111 }, { "epoch": 1.7423312883435584, "grad_norm": 22.68269095899723, "learning_rate": 4.466577479677523e-06, "loss": 1.8932, "step": 5112 }, { "epoch": 1.7426721199727335, "grad_norm": 18.030860016040503, "learning_rate": 4.464605760957916e-06, "loss": 2.2986, "step": 5113 }, { "epoch": 1.7430129516019086, "grad_norm": 20.25867945223272, "learning_rate": 4.462634126457955e-06, "loss": 2.0511, "step": 5114 }, { "epoch": 1.743353783231084, "grad_norm": 18.87870835845495, "learning_rate": 4.460662576487783e-06, "loss": 2.4401, "step": 5115 }, { "epoch": 1.743694614860259, "grad_norm": 27.6443661365189, "learning_rate": 4.458691111357537e-06, "loss": 2.1463, "step": 5116 }, { "epoch": 1.744035446489434, "grad_norm": 18.73090214809991, "learning_rate": 4.456719731377332e-06, "loss": 1.9245, "step": 5117 }, { "epoch": 1.7443762781186094, "grad_norm": 13.519953612673026, "learning_rate": 4.454748436857276e-06, "loss": 2.3429, "step": 5118 }, { "epoch": 1.7447171097477847, "grad_norm": 15.365371421567367, "learning_rate": 4.452777228107463e-06, "loss": 2.056, "step": 5119 }, { "epoch": 1.7450579413769598, "grad_norm": 12.586322061690149, "learning_rate": 4.4508061054379705e-06, "loss": 2.3949, "step": 5120 }, { "epoch": 1.7453987730061349, "grad_norm": 19.40577934288575, "learning_rate": 4.4488350691588624e-06, "loss": 2.0839, "step": 5121 }, { "epoch": 1.7457396046353102, "grad_norm": 15.795101016865903, "learning_rate": 4.446864119580192e-06, "loss": 2.3132, "step": 5122 }, { "epoch": 1.7460804362644855, "grad_norm": 17.200528177669543, "learning_rate": 4.444893257011998e-06, "loss": 2.3768, "step": 5123 }, { "epoch": 1.7464212678936604, "grad_norm": 20.947912105664297, "learning_rate": 4.442922481764306e-06, "loss": 2.5519, "step": 5124 }, { "epoch": 1.7467620995228357, "grad_norm": 14.015479239236948, "learning_rate": 4.440951794147122e-06, "loss": 2.5172, "step": 5125 }, { "epoch": 1.747102931152011, "grad_norm": 21.549001875029777, "learning_rate": 4.4389811944704476e-06, "loss": 2.3767, "step": 5126 }, { "epoch": 1.747443762781186, "grad_norm": 10.84065752154643, "learning_rate": 4.437010683044266e-06, "loss": 1.9243, "step": 5127 }, { "epoch": 1.7477845944103612, "grad_norm": 17.68132816923046, "learning_rate": 4.4350402601785425e-06, "loss": 2.6642, "step": 5128 }, { "epoch": 1.7481254260395365, "grad_norm": 25.705284468193096, "learning_rate": 4.433069926183238e-06, "loss": 1.8235, "step": 5129 }, { "epoch": 1.7484662576687118, "grad_norm": 15.731930819516988, "learning_rate": 4.431099681368289e-06, "loss": 2.0522, "step": 5130 }, { "epoch": 1.7488070892978869, "grad_norm": 14.2470458239207, "learning_rate": 4.429129526043624e-06, "loss": 2.63, "step": 5131 }, { "epoch": 1.749147920927062, "grad_norm": 17.176678588314427, "learning_rate": 4.427159460519159e-06, "loss": 2.0559, "step": 5132 }, { "epoch": 1.7494887525562373, "grad_norm": 20.2394025632041, "learning_rate": 4.425189485104792e-06, "loss": 2.4002, "step": 5133 }, { "epoch": 1.7498295841854126, "grad_norm": 19.608357194023256, "learning_rate": 4.423219600110404e-06, "loss": 2.3485, "step": 5134 }, { "epoch": 1.7501704158145874, "grad_norm": 17.665367063467652, "learning_rate": 4.42124980584587e-06, "loss": 2.0905, "step": 5135 }, { "epoch": 1.7505112474437627, "grad_norm": 21.075550580754435, "learning_rate": 4.419280102621047e-06, "loss": 2.2592, "step": 5136 }, { "epoch": 1.750852079072938, "grad_norm": 25.87264319750964, "learning_rate": 4.417310490745773e-06, "loss": 2.3822, "step": 5137 }, { "epoch": 1.7511929107021131, "grad_norm": 18.82225129742288, "learning_rate": 4.41534097052988e-06, "loss": 1.6717, "step": 5138 }, { "epoch": 1.7515337423312882, "grad_norm": 19.77707497225093, "learning_rate": 4.413371542283181e-06, "loss": 2.0733, "step": 5139 }, { "epoch": 1.7518745739604635, "grad_norm": 12.374411045699889, "learning_rate": 4.411402206315471e-06, "loss": 2.1299, "step": 5140 }, { "epoch": 1.7522154055896388, "grad_norm": 15.75324790787278, "learning_rate": 4.409432962936539e-06, "loss": 2.2574, "step": 5141 }, { "epoch": 1.752556237218814, "grad_norm": 16.799892575003046, "learning_rate": 4.407463812456152e-06, "loss": 2.0906, "step": 5142 }, { "epoch": 1.752897068847989, "grad_norm": 14.962223270682488, "learning_rate": 4.405494755184065e-06, "loss": 1.9705, "step": 5143 }, { "epoch": 1.7532379004771643, "grad_norm": 18.741642457184337, "learning_rate": 4.403525791430022e-06, "loss": 2.668, "step": 5144 }, { "epoch": 1.7535787321063396, "grad_norm": 17.28072189599227, "learning_rate": 4.401556921503746e-06, "loss": 2.0305, "step": 5145 }, { "epoch": 1.7539195637355145, "grad_norm": 18.818454366900504, "learning_rate": 4.3995881457149465e-06, "loss": 2.4462, "step": 5146 }, { "epoch": 1.7542603953646898, "grad_norm": 24.646895648681475, "learning_rate": 4.3976194643733225e-06, "loss": 1.8048, "step": 5147 }, { "epoch": 1.7546012269938651, "grad_norm": 14.65979795628185, "learning_rate": 4.395650877788557e-06, "loss": 2.2892, "step": 5148 }, { "epoch": 1.7549420586230402, "grad_norm": 15.956902990292656, "learning_rate": 4.3936823862703105e-06, "loss": 2.2802, "step": 5149 }, { "epoch": 1.7552828902522153, "grad_norm": 15.42358398509083, "learning_rate": 4.391713990128242e-06, "loss": 2.5677, "step": 5150 }, { "epoch": 1.7556237218813906, "grad_norm": 14.04965134738658, "learning_rate": 4.3897456896719814e-06, "loss": 2.1454, "step": 5151 }, { "epoch": 1.755964553510566, "grad_norm": 16.070807558357625, "learning_rate": 4.387777485211155e-06, "loss": 1.787, "step": 5152 }, { "epoch": 1.756305385139741, "grad_norm": 20.54092092025563, "learning_rate": 4.3858093770553655e-06, "loss": 2.2455, "step": 5153 }, { "epoch": 1.756646216768916, "grad_norm": 20.200531060464293, "learning_rate": 4.383841365514208e-06, "loss": 2.3754, "step": 5154 }, { "epoch": 1.7569870483980914, "grad_norm": 20.536712314587447, "learning_rate": 4.381873450897255e-06, "loss": 1.8847, "step": 5155 }, { "epoch": 1.7573278800272665, "grad_norm": 18.732254873095805, "learning_rate": 4.3799056335140675e-06, "loss": 2.3076, "step": 5156 }, { "epoch": 1.7576687116564416, "grad_norm": 34.42809005476786, "learning_rate": 4.377937913674195e-06, "loss": 2.4725, "step": 5157 }, { "epoch": 1.7580095432856169, "grad_norm": 26.94952743440283, "learning_rate": 4.375970291687165e-06, "loss": 2.5146, "step": 5158 }, { "epoch": 1.7583503749147922, "grad_norm": 15.891943882405467, "learning_rate": 4.37400276786249e-06, "loss": 2.1845, "step": 5159 }, { "epoch": 1.7586912065439673, "grad_norm": 20.7867133943064, "learning_rate": 4.372035342509673e-06, "loss": 2.6423, "step": 5160 }, { "epoch": 1.7590320381731424, "grad_norm": 19.774075200502747, "learning_rate": 4.370068015938197e-06, "loss": 2.3917, "step": 5161 }, { "epoch": 1.7593728698023177, "grad_norm": 16.178699468410787, "learning_rate": 4.368100788457527e-06, "loss": 1.9482, "step": 5162 }, { "epoch": 1.759713701431493, "grad_norm": 22.762251903076532, "learning_rate": 4.366133660377121e-06, "loss": 2.5826, "step": 5163 }, { "epoch": 1.760054533060668, "grad_norm": 30.49888896936627, "learning_rate": 4.364166632006413e-06, "loss": 2.5322, "step": 5164 }, { "epoch": 1.7603953646898431, "grad_norm": 12.121110306422834, "learning_rate": 4.362199703654823e-06, "loss": 1.6104, "step": 5165 }, { "epoch": 1.7607361963190185, "grad_norm": 14.22227022275941, "learning_rate": 4.360232875631762e-06, "loss": 2.139, "step": 5166 }, { "epoch": 1.7610770279481935, "grad_norm": 14.654260244447787, "learning_rate": 4.358266148246614e-06, "loss": 2.1818, "step": 5167 }, { "epoch": 1.7614178595773686, "grad_norm": 11.98963325863662, "learning_rate": 4.356299521808756e-06, "loss": 2.2926, "step": 5168 }, { "epoch": 1.761758691206544, "grad_norm": 13.372039496101104, "learning_rate": 4.354332996627546e-06, "loss": 2.13, "step": 5169 }, { "epoch": 1.7620995228357192, "grad_norm": 14.627291320675825, "learning_rate": 4.352366573012327e-06, "loss": 2.2748, "step": 5170 }, { "epoch": 1.7624403544648943, "grad_norm": 17.261031419481316, "learning_rate": 4.350400251272421e-06, "loss": 2.3547, "step": 5171 }, { "epoch": 1.7627811860940694, "grad_norm": 14.50724129027168, "learning_rate": 4.348434031717143e-06, "loss": 1.9419, "step": 5172 }, { "epoch": 1.7631220177232447, "grad_norm": 15.601626137015229, "learning_rate": 4.3464679146557866e-06, "loss": 2.2423, "step": 5173 }, { "epoch": 1.76346284935242, "grad_norm": 19.136693101107426, "learning_rate": 4.344501900397626e-06, "loss": 2.3318, "step": 5174 }, { "epoch": 1.7638036809815951, "grad_norm": 18.15853545810415, "learning_rate": 4.342535989251929e-06, "loss": 2.3646, "step": 5175 }, { "epoch": 1.7641445126107702, "grad_norm": 17.88930999113575, "learning_rate": 4.3405701815279376e-06, "loss": 2.2258, "step": 5176 }, { "epoch": 1.7644853442399455, "grad_norm": 23.941915279109775, "learning_rate": 4.338604477534879e-06, "loss": 2.9619, "step": 5177 }, { "epoch": 1.7648261758691206, "grad_norm": 14.588102806271792, "learning_rate": 4.336638877581971e-06, "loss": 2.4366, "step": 5178 }, { "epoch": 1.7651670074982957, "grad_norm": 19.17988694195289, "learning_rate": 4.3346733819784095e-06, "loss": 2.0189, "step": 5179 }, { "epoch": 1.765507839127471, "grad_norm": 16.020822866945014, "learning_rate": 4.332707991033371e-06, "loss": 1.908, "step": 5180 }, { "epoch": 1.7658486707566463, "grad_norm": 18.170819428848134, "learning_rate": 4.330742705056024e-06, "loss": 2.4809, "step": 5181 }, { "epoch": 1.7661895023858214, "grad_norm": 18.003602036465953, "learning_rate": 4.328777524355513e-06, "loss": 2.3801, "step": 5182 }, { "epoch": 1.7665303340149965, "grad_norm": 17.659237854238395, "learning_rate": 4.326812449240968e-06, "loss": 2.6564, "step": 5183 }, { "epoch": 1.7668711656441718, "grad_norm": 18.822568627253037, "learning_rate": 4.324847480021506e-06, "loss": 2.3391, "step": 5184 }, { "epoch": 1.767211997273347, "grad_norm": 23.90287207941454, "learning_rate": 4.322882617006223e-06, "loss": 2.8046, "step": 5185 }, { "epoch": 1.7675528289025222, "grad_norm": 28.061175878522562, "learning_rate": 4.3209178605042e-06, "loss": 2.169, "step": 5186 }, { "epoch": 1.7678936605316973, "grad_norm": 17.6463445950485, "learning_rate": 4.318953210824499e-06, "loss": 1.682, "step": 5187 }, { "epoch": 1.7682344921608726, "grad_norm": 12.805419312607222, "learning_rate": 4.316988668276171e-06, "loss": 2.1139, "step": 5188 }, { "epoch": 1.7685753237900477, "grad_norm": 17.065697198312883, "learning_rate": 4.315024233168243e-06, "loss": 2.0432, "step": 5189 }, { "epoch": 1.7689161554192228, "grad_norm": 21.02337818801944, "learning_rate": 4.31305990580973e-06, "loss": 2.7886, "step": 5190 }, { "epoch": 1.769256987048398, "grad_norm": 16.347835776484548, "learning_rate": 4.31109568650963e-06, "loss": 2.1585, "step": 5191 }, { "epoch": 1.7695978186775734, "grad_norm": 13.483451657497273, "learning_rate": 4.30913157557692e-06, "loss": 1.878, "step": 5192 }, { "epoch": 1.7699386503067485, "grad_norm": 13.950003862031465, "learning_rate": 4.307167573320562e-06, "loss": 1.7488, "step": 5193 }, { "epoch": 1.7702794819359235, "grad_norm": 18.089747462320023, "learning_rate": 4.305203680049504e-06, "loss": 2.7858, "step": 5194 }, { "epoch": 1.7706203135650989, "grad_norm": 22.057050352083678, "learning_rate": 4.303239896072673e-06, "loss": 2.3185, "step": 5195 }, { "epoch": 1.7709611451942742, "grad_norm": 16.60242507065884, "learning_rate": 4.301276221698979e-06, "loss": 2.5922, "step": 5196 }, { "epoch": 1.7713019768234493, "grad_norm": 13.878289940339824, "learning_rate": 4.299312657237318e-06, "loss": 1.9047, "step": 5197 }, { "epoch": 1.7716428084526243, "grad_norm": 128.67943336847222, "learning_rate": 4.2973492029965655e-06, "loss": 2.7212, "step": 5198 }, { "epoch": 1.7719836400817996, "grad_norm": 17.19074704925077, "learning_rate": 4.295385859285579e-06, "loss": 2.259, "step": 5199 }, { "epoch": 1.7723244717109747, "grad_norm": 16.509461245188916, "learning_rate": 4.293422626413203e-06, "loss": 1.6283, "step": 5200 }, { "epoch": 1.7726653033401498, "grad_norm": 12.574671908019113, "learning_rate": 4.29145950468826e-06, "loss": 2.2783, "step": 5201 }, { "epoch": 1.7730061349693251, "grad_norm": 18.039759327999626, "learning_rate": 4.289496494419556e-06, "loss": 2.091, "step": 5202 }, { "epoch": 1.7733469665985004, "grad_norm": 18.363845282649415, "learning_rate": 4.287533595915884e-06, "loss": 2.1459, "step": 5203 }, { "epoch": 1.7736877982276755, "grad_norm": 11.463006065984715, "learning_rate": 4.285570809486014e-06, "loss": 2.1378, "step": 5204 }, { "epoch": 1.7740286298568506, "grad_norm": 12.864385778617457, "learning_rate": 4.283608135438697e-06, "loss": 1.9613, "step": 5205 }, { "epoch": 1.774369461486026, "grad_norm": 14.474677316114839, "learning_rate": 4.281645574082674e-06, "loss": 2.5954, "step": 5206 }, { "epoch": 1.7747102931152012, "grad_norm": 17.219788599854596, "learning_rate": 4.279683125726662e-06, "loss": 2.2496, "step": 5207 }, { "epoch": 1.7750511247443763, "grad_norm": 15.171308871848867, "learning_rate": 4.27772079067936e-06, "loss": 2.422, "step": 5208 }, { "epoch": 1.7753919563735514, "grad_norm": 16.23403309549262, "learning_rate": 4.275758569249455e-06, "loss": 2.1934, "step": 5209 }, { "epoch": 1.7757327880027267, "grad_norm": 16.101807896476554, "learning_rate": 4.273796461745609e-06, "loss": 2.3911, "step": 5210 }, { "epoch": 1.7760736196319018, "grad_norm": 12.643945839126618, "learning_rate": 4.271834468476469e-06, "loss": 2.2212, "step": 5211 }, { "epoch": 1.7764144512610769, "grad_norm": 31.193360884767273, "learning_rate": 4.2698725897506685e-06, "loss": 2.3907, "step": 5212 }, { "epoch": 1.7767552828902522, "grad_norm": 19.402399563064424, "learning_rate": 4.267910825876816e-06, "loss": 2.265, "step": 5213 }, { "epoch": 1.7770961145194275, "grad_norm": 20.210168352226738, "learning_rate": 4.265949177163503e-06, "loss": 2.1921, "step": 5214 }, { "epoch": 1.7774369461486026, "grad_norm": 19.008321367727664, "learning_rate": 4.263987643919306e-06, "loss": 2.3328, "step": 5215 }, { "epoch": 1.7777777777777777, "grad_norm": 16.39675747925913, "learning_rate": 4.262026226452785e-06, "loss": 3.1314, "step": 5216 }, { "epoch": 1.778118609406953, "grad_norm": 13.99975841091573, "learning_rate": 4.260064925072476e-06, "loss": 2.5995, "step": 5217 }, { "epoch": 1.7784594410361283, "grad_norm": 19.816965145059672, "learning_rate": 4.258103740086898e-06, "loss": 1.8668, "step": 5218 }, { "epoch": 1.7788002726653034, "grad_norm": 14.315495182584632, "learning_rate": 4.256142671804556e-06, "loss": 1.9889, "step": 5219 }, { "epoch": 1.7791411042944785, "grad_norm": 15.669368037806887, "learning_rate": 4.254181720533933e-06, "loss": 2.2456, "step": 5220 }, { "epoch": 1.7794819359236538, "grad_norm": 18.290257947410613, "learning_rate": 4.252220886583493e-06, "loss": 2.5872, "step": 5221 }, { "epoch": 1.7798227675528289, "grad_norm": 23.1361964322411, "learning_rate": 4.2502601702616865e-06, "loss": 2.6008, "step": 5222 }, { "epoch": 1.780163599182004, "grad_norm": 25.585345837680407, "learning_rate": 4.248299571876938e-06, "loss": 1.9261, "step": 5223 }, { "epoch": 1.7805044308111793, "grad_norm": 22.67873830928119, "learning_rate": 4.246339091737658e-06, "loss": 2.6562, "step": 5224 }, { "epoch": 1.7808452624403546, "grad_norm": 16.163437960381685, "learning_rate": 4.244378730152242e-06, "loss": 1.683, "step": 5225 }, { "epoch": 1.7811860940695297, "grad_norm": 19.42186530338165, "learning_rate": 4.242418487429059e-06, "loss": 2.3917, "step": 5226 }, { "epoch": 1.7815269256987047, "grad_norm": 21.18168637620133, "learning_rate": 4.240458363876462e-06, "loss": 2.1382, "step": 5227 }, { "epoch": 1.78186775732788, "grad_norm": 15.746780198875369, "learning_rate": 4.23849835980279e-06, "loss": 2.3668, "step": 5228 }, { "epoch": 1.7822085889570554, "grad_norm": 21.914052364045357, "learning_rate": 4.236538475516357e-06, "loss": 2.6084, "step": 5229 }, { "epoch": 1.7825494205862304, "grad_norm": 13.966215805091714, "learning_rate": 4.2345787113254585e-06, "loss": 2.2779, "step": 5230 }, { "epoch": 1.7828902522154055, "grad_norm": 16.99400624637014, "learning_rate": 4.232619067538378e-06, "loss": 1.3921, "step": 5231 }, { "epoch": 1.7832310838445808, "grad_norm": 15.714590273838313, "learning_rate": 4.230659544463373e-06, "loss": 2.3525, "step": 5232 }, { "epoch": 1.783571915473756, "grad_norm": 20.999984552234825, "learning_rate": 4.228700142408683e-06, "loss": 2.0405, "step": 5233 }, { "epoch": 1.783912747102931, "grad_norm": 12.45463133752388, "learning_rate": 4.226740861682532e-06, "loss": 2.2719, "step": 5234 }, { "epoch": 1.7842535787321063, "grad_norm": 10.66365954237188, "learning_rate": 4.224781702593122e-06, "loss": 2.0158, "step": 5235 }, { "epoch": 1.7845944103612816, "grad_norm": 15.753772372338394, "learning_rate": 4.2228226654486335e-06, "loss": 2.2535, "step": 5236 }, { "epoch": 1.7849352419904567, "grad_norm": 12.915126000968876, "learning_rate": 4.220863750557237e-06, "loss": 2.123, "step": 5237 }, { "epoch": 1.7852760736196318, "grad_norm": 18.918818576709995, "learning_rate": 4.218904958227073e-06, "loss": 2.3984, "step": 5238 }, { "epoch": 1.7856169052488071, "grad_norm": 14.686613998264935, "learning_rate": 4.216946288766267e-06, "loss": 2.3304, "step": 5239 }, { "epoch": 1.7859577368779824, "grad_norm": 15.110052167738678, "learning_rate": 4.2149877424829275e-06, "loss": 2.2871, "step": 5240 }, { "epoch": 1.7862985685071575, "grad_norm": 18.221431098019252, "learning_rate": 4.213029319685142e-06, "loss": 2.3302, "step": 5241 }, { "epoch": 1.7866394001363326, "grad_norm": 16.80376866017313, "learning_rate": 4.211071020680976e-06, "loss": 2.5375, "step": 5242 }, { "epoch": 1.786980231765508, "grad_norm": 14.35877419240093, "learning_rate": 4.209112845778481e-06, "loss": 1.6696, "step": 5243 }, { "epoch": 1.787321063394683, "grad_norm": 16.096402117727695, "learning_rate": 4.207154795285682e-06, "loss": 2.5536, "step": 5244 }, { "epoch": 1.787661895023858, "grad_norm": 12.868784718432256, "learning_rate": 4.205196869510589e-06, "loss": 1.8969, "step": 5245 }, { "epoch": 1.7880027266530334, "grad_norm": 21.893972626025967, "learning_rate": 4.203239068761194e-06, "loss": 2.5371, "step": 5246 }, { "epoch": 1.7883435582822087, "grad_norm": 13.732050162989005, "learning_rate": 4.201281393345466e-06, "loss": 1.6791, "step": 5247 }, { "epoch": 1.7886843899113838, "grad_norm": 19.679665231345332, "learning_rate": 4.1993238435713525e-06, "loss": 2.1028, "step": 5248 }, { "epoch": 1.7890252215405589, "grad_norm": 15.032523555979186, "learning_rate": 4.197366419746786e-06, "loss": 2.2215, "step": 5249 }, { "epoch": 1.7893660531697342, "grad_norm": 14.712556490839296, "learning_rate": 4.195409122179678e-06, "loss": 2.1248, "step": 5250 }, { "epoch": 1.7897068847989095, "grad_norm": 15.255715711466312, "learning_rate": 4.193451951177917e-06, "loss": 2.2773, "step": 5251 }, { "epoch": 1.7900477164280846, "grad_norm": 17.34243385537868, "learning_rate": 4.191494907049375e-06, "loss": 1.967, "step": 5252 }, { "epoch": 1.7903885480572597, "grad_norm": 16.371860721254848, "learning_rate": 4.1895379901019025e-06, "loss": 2.0244, "step": 5253 }, { "epoch": 1.790729379686435, "grad_norm": 16.281854762652998, "learning_rate": 4.187581200643331e-06, "loss": 2.3322, "step": 5254 }, { "epoch": 1.79107021131561, "grad_norm": 15.798591614550407, "learning_rate": 4.185624538981469e-06, "loss": 2.4108, "step": 5255 }, { "epoch": 1.7914110429447851, "grad_norm": 16.861541017788415, "learning_rate": 4.1836680054241096e-06, "loss": 2.6917, "step": 5256 }, { "epoch": 1.7917518745739605, "grad_norm": 11.896934024503803, "learning_rate": 4.1817116002790235e-06, "loss": 1.8779, "step": 5257 }, { "epoch": 1.7920927062031358, "grad_norm": 21.159392504399207, "learning_rate": 4.179755323853959e-06, "loss": 2.9045, "step": 5258 }, { "epoch": 1.7924335378323109, "grad_norm": 14.35937225970893, "learning_rate": 4.177799176456649e-06, "loss": 2.3445, "step": 5259 }, { "epoch": 1.792774369461486, "grad_norm": 19.861287432225694, "learning_rate": 4.1758431583947995e-06, "loss": 2.5559, "step": 5260 }, { "epoch": 1.7931152010906612, "grad_norm": 17.56233568179384, "learning_rate": 4.1738872699761016e-06, "loss": 2.2898, "step": 5261 }, { "epoch": 1.7934560327198366, "grad_norm": 12.724758303865363, "learning_rate": 4.171931511508227e-06, "loss": 1.4939, "step": 5262 }, { "epoch": 1.7937968643490114, "grad_norm": 20.873635901506322, "learning_rate": 4.169975883298822e-06, "loss": 2.1963, "step": 5263 }, { "epoch": 1.7941376959781867, "grad_norm": 13.385316658502127, "learning_rate": 4.1680203856555115e-06, "loss": 1.8913, "step": 5264 }, { "epoch": 1.794478527607362, "grad_norm": 17.791482461741296, "learning_rate": 4.166065018885909e-06, "loss": 2.0954, "step": 5265 }, { "epoch": 1.7948193592365371, "grad_norm": 20.332298349375968, "learning_rate": 4.164109783297598e-06, "loss": 2.1327, "step": 5266 }, { "epoch": 1.7951601908657122, "grad_norm": 17.150149802514125, "learning_rate": 4.162154679198145e-06, "loss": 2.0578, "step": 5267 }, { "epoch": 1.7955010224948875, "grad_norm": 20.453349315939715, "learning_rate": 4.160199706895098e-06, "loss": 2.3716, "step": 5268 }, { "epoch": 1.7958418541240628, "grad_norm": 20.471220755625584, "learning_rate": 4.158244866695978e-06, "loss": 2.6919, "step": 5269 }, { "epoch": 1.796182685753238, "grad_norm": 12.811644309882633, "learning_rate": 4.156290158908292e-06, "loss": 1.5699, "step": 5270 }, { "epoch": 1.796523517382413, "grad_norm": 32.259828767005, "learning_rate": 4.154335583839524e-06, "loss": 2.866, "step": 5271 }, { "epoch": 1.7968643490115883, "grad_norm": 16.808700322592195, "learning_rate": 4.152381141797135e-06, "loss": 2.412, "step": 5272 }, { "epoch": 1.7972051806407636, "grad_norm": 15.82385120190494, "learning_rate": 4.1504268330885636e-06, "loss": 2.6066, "step": 5273 }, { "epoch": 1.7975460122699385, "grad_norm": 19.488692851969926, "learning_rate": 4.148472658021235e-06, "loss": 1.7622, "step": 5274 }, { "epoch": 1.7978868438991138, "grad_norm": 11.113578617486324, "learning_rate": 4.146518616902548e-06, "loss": 1.9224, "step": 5275 }, { "epoch": 1.798227675528289, "grad_norm": 18.41089840986063, "learning_rate": 4.1445647100398785e-06, "loss": 2.8304, "step": 5276 }, { "epoch": 1.7985685071574642, "grad_norm": 13.703152454797904, "learning_rate": 4.142610937740587e-06, "loss": 2.6544, "step": 5277 }, { "epoch": 1.7989093387866393, "grad_norm": 26.15031131142314, "learning_rate": 4.140657300312008e-06, "loss": 2.7237, "step": 5278 }, { "epoch": 1.7992501704158146, "grad_norm": 12.447822787596733, "learning_rate": 4.138703798061454e-06, "loss": 2.0417, "step": 5279 }, { "epoch": 1.79959100204499, "grad_norm": 15.319689822618809, "learning_rate": 4.136750431296225e-06, "loss": 1.8598, "step": 5280 }, { "epoch": 1.799931833674165, "grad_norm": 10.990436589457122, "learning_rate": 4.1347972003235876e-06, "loss": 2.2373, "step": 5281 }, { "epoch": 1.80027266530334, "grad_norm": 17.667099407811865, "learning_rate": 4.132844105450795e-06, "loss": 2.2733, "step": 5282 }, { "epoch": 1.8006134969325154, "grad_norm": 13.289553343281133, "learning_rate": 4.130891146985075e-06, "loss": 2.3071, "step": 5283 }, { "epoch": 1.8009543285616907, "grad_norm": 18.135636208020195, "learning_rate": 4.128938325233639e-06, "loss": 2.42, "step": 5284 }, { "epoch": 1.8012951601908656, "grad_norm": 15.821410389087161, "learning_rate": 4.126985640503672e-06, "loss": 2.0587, "step": 5285 }, { "epoch": 1.8016359918200409, "grad_norm": 15.786537814962793, "learning_rate": 4.125033093102338e-06, "loss": 2.5444, "step": 5286 }, { "epoch": 1.8019768234492162, "grad_norm": 18.55302424270161, "learning_rate": 4.123080683336782e-06, "loss": 2.6826, "step": 5287 }, { "epoch": 1.8023176550783913, "grad_norm": 15.181797071750413, "learning_rate": 4.121128411514126e-06, "loss": 2.1908, "step": 5288 }, { "epoch": 1.8026584867075663, "grad_norm": 16.44077128252316, "learning_rate": 4.1191762779414675e-06, "loss": 2.0888, "step": 5289 }, { "epoch": 1.8029993183367417, "grad_norm": 19.120082420509107, "learning_rate": 4.117224282925888e-06, "loss": 1.6726, "step": 5290 }, { "epoch": 1.803340149965917, "grad_norm": 20.112126824679056, "learning_rate": 4.115272426774445e-06, "loss": 2.3512, "step": 5291 }, { "epoch": 1.803680981595092, "grad_norm": 13.068415366263507, "learning_rate": 4.113320709794168e-06, "loss": 2.2284, "step": 5292 }, { "epoch": 1.8040218132242671, "grad_norm": 20.330160940644305, "learning_rate": 4.111369132292074e-06, "loss": 2.2363, "step": 5293 }, { "epoch": 1.8043626448534424, "grad_norm": 20.107232059657008, "learning_rate": 4.109417694575154e-06, "loss": 2.5534, "step": 5294 }, { "epoch": 1.8047034764826178, "grad_norm": 17.293733111459435, "learning_rate": 4.107466396950373e-06, "loss": 2.1371, "step": 5295 }, { "epoch": 1.8050443081117926, "grad_norm": 21.623254958576943, "learning_rate": 4.105515239724685e-06, "loss": 1.9963, "step": 5296 }, { "epoch": 1.805385139740968, "grad_norm": 14.790078173493932, "learning_rate": 4.1035642232050086e-06, "loss": 2.5188, "step": 5297 }, { "epoch": 1.8057259713701432, "grad_norm": 15.860063494542196, "learning_rate": 4.101613347698247e-06, "loss": 2.681, "step": 5298 }, { "epoch": 1.8060668029993183, "grad_norm": 15.588952323625755, "learning_rate": 4.099662613511282e-06, "loss": 1.6576, "step": 5299 }, { "epoch": 1.8064076346284934, "grad_norm": 21.686229632690182, "learning_rate": 4.097712020950975e-06, "loss": 2.1635, "step": 5300 }, { "epoch": 1.8067484662576687, "grad_norm": 13.495327285478396, "learning_rate": 4.095761570324156e-06, "loss": 2.0185, "step": 5301 }, { "epoch": 1.807089297886844, "grad_norm": 22.86670268439134, "learning_rate": 4.093811261937643e-06, "loss": 2.6338, "step": 5302 }, { "epoch": 1.8074301295160191, "grad_norm": 23.482154991723245, "learning_rate": 4.091861096098225e-06, "loss": 1.6411, "step": 5303 }, { "epoch": 1.8077709611451942, "grad_norm": 18.43740163414355, "learning_rate": 4.089911073112671e-06, "loss": 2.4895, "step": 5304 }, { "epoch": 1.8081117927743695, "grad_norm": 19.45332632423275, "learning_rate": 4.087961193287731e-06, "loss": 2.6011, "step": 5305 }, { "epoch": 1.8084526244035446, "grad_norm": 14.951846733006658, "learning_rate": 4.086011456930125e-06, "loss": 2.9102, "step": 5306 }, { "epoch": 1.8087934560327197, "grad_norm": 18.768173228768614, "learning_rate": 4.084061864346553e-06, "loss": 2.3317, "step": 5307 }, { "epoch": 1.809134287661895, "grad_norm": 25.17300467209956, "learning_rate": 4.082112415843697e-06, "loss": 2.1809, "step": 5308 }, { "epoch": 1.8094751192910703, "grad_norm": 23.842191094081535, "learning_rate": 4.080163111728213e-06, "loss": 2.1674, "step": 5309 }, { "epoch": 1.8098159509202454, "grad_norm": 100.1505503005152, "learning_rate": 4.07821395230673e-06, "loss": 2.4359, "step": 5310 }, { "epoch": 1.8101567825494205, "grad_norm": 13.211667676951642, "learning_rate": 4.076264937885865e-06, "loss": 2.087, "step": 5311 }, { "epoch": 1.8104976141785958, "grad_norm": 21.65860853053618, "learning_rate": 4.074316068772201e-06, "loss": 2.0583, "step": 5312 }, { "epoch": 1.810838445807771, "grad_norm": 16.819686289794454, "learning_rate": 4.072367345272303e-06, "loss": 1.9439, "step": 5313 }, { "epoch": 1.8111792774369462, "grad_norm": 17.128048975832634, "learning_rate": 4.070418767692718e-06, "loss": 2.4549, "step": 5314 }, { "epoch": 1.8115201090661213, "grad_norm": 22.152637723928414, "learning_rate": 4.0684703363399584e-06, "loss": 1.7793, "step": 5315 }, { "epoch": 1.8118609406952966, "grad_norm": 18.510399068108867, "learning_rate": 4.066522051520525e-06, "loss": 2.3308, "step": 5316 }, { "epoch": 1.8122017723244717, "grad_norm": 15.957078602891515, "learning_rate": 4.064573913540887e-06, "loss": 2.5012, "step": 5317 }, { "epoch": 1.8125426039536467, "grad_norm": 18.010431511342592, "learning_rate": 4.0626259227074975e-06, "loss": 2.3271, "step": 5318 }, { "epoch": 1.812883435582822, "grad_norm": 18.390821511479828, "learning_rate": 4.060678079326781e-06, "loss": 1.9537, "step": 5319 }, { "epoch": 1.8132242672119974, "grad_norm": 18.167934908606124, "learning_rate": 4.058730383705141e-06, "loss": 2.2359, "step": 5320 }, { "epoch": 1.8135650988411725, "grad_norm": 11.040832987567578, "learning_rate": 4.05678283614896e-06, "loss": 2.3592, "step": 5321 }, { "epoch": 1.8139059304703475, "grad_norm": 21.515480901682555, "learning_rate": 4.054835436964595e-06, "loss": 2.8566, "step": 5322 }, { "epoch": 1.8142467620995228, "grad_norm": 27.822219848635935, "learning_rate": 4.052888186458375e-06, "loss": 2.5675, "step": 5323 }, { "epoch": 1.8145875937286982, "grad_norm": 18.86849546759588, "learning_rate": 4.050941084936615e-06, "loss": 2.0854, "step": 5324 }, { "epoch": 1.8149284253578732, "grad_norm": 36.0872047116249, "learning_rate": 4.048994132705601e-06, "loss": 2.5225, "step": 5325 }, { "epoch": 1.8152692569870483, "grad_norm": 15.116708485695336, "learning_rate": 4.047047330071594e-06, "loss": 1.5788, "step": 5326 }, { "epoch": 1.8156100886162236, "grad_norm": 14.13487345607294, "learning_rate": 4.045100677340838e-06, "loss": 2.3415, "step": 5327 }, { "epoch": 1.8159509202453987, "grad_norm": 23.224817518452195, "learning_rate": 4.043154174819545e-06, "loss": 2.286, "step": 5328 }, { "epoch": 1.8162917518745738, "grad_norm": 15.198661860652349, "learning_rate": 4.041207822813908e-06, "loss": 1.9213, "step": 5329 }, { "epoch": 1.8166325835037491, "grad_norm": 13.078996283781139, "learning_rate": 4.039261621630099e-06, "loss": 2.2641, "step": 5330 }, { "epoch": 1.8169734151329244, "grad_norm": 15.324275388222635, "learning_rate": 4.037315571574263e-06, "loss": 2.3079, "step": 5331 }, { "epoch": 1.8173142467620995, "grad_norm": 13.856035833205768, "learning_rate": 4.035369672952516e-06, "loss": 2.1398, "step": 5332 }, { "epoch": 1.8176550783912746, "grad_norm": 14.632982459192034, "learning_rate": 4.033423926070962e-06, "loss": 2.0987, "step": 5333 }, { "epoch": 1.81799591002045, "grad_norm": 25.098778173110805, "learning_rate": 4.031478331235672e-06, "loss": 2.5418, "step": 5334 }, { "epoch": 1.8183367416496252, "grad_norm": 14.583556901799609, "learning_rate": 4.029532888752694e-06, "loss": 1.6359, "step": 5335 }, { "epoch": 1.8186775732788003, "grad_norm": 13.732222614199445, "learning_rate": 4.027587598928058e-06, "loss": 2.3258, "step": 5336 }, { "epoch": 1.8190184049079754, "grad_norm": 17.62441294535624, "learning_rate": 4.025642462067763e-06, "loss": 2.3704, "step": 5337 }, { "epoch": 1.8193592365371507, "grad_norm": 11.800373942694927, "learning_rate": 4.023697478477786e-06, "loss": 2.1146, "step": 5338 }, { "epoch": 1.8197000681663258, "grad_norm": 20.54721163408385, "learning_rate": 4.021752648464085e-06, "loss": 2.2792, "step": 5339 }, { "epoch": 1.8200408997955009, "grad_norm": 18.003816649829496, "learning_rate": 4.019807972332585e-06, "loss": 2.0871, "step": 5340 }, { "epoch": 1.8203817314246762, "grad_norm": 18.262664236806977, "learning_rate": 4.017863450389192e-06, "loss": 2.8312, "step": 5341 }, { "epoch": 1.8207225630538515, "grad_norm": 13.726829527009878, "learning_rate": 4.0159190829397885e-06, "loss": 2.1684, "step": 5342 }, { "epoch": 1.8210633946830266, "grad_norm": 16.467410620860203, "learning_rate": 4.013974870290231e-06, "loss": 1.9633, "step": 5343 }, { "epoch": 1.8214042263122017, "grad_norm": 13.599438671388898, "learning_rate": 4.012030812746351e-06, "loss": 2.3141, "step": 5344 }, { "epoch": 1.821745057941377, "grad_norm": 35.59953632392444, "learning_rate": 4.010086910613957e-06, "loss": 2.5423, "step": 5345 }, { "epoch": 1.8220858895705523, "grad_norm": 14.04560300190091, "learning_rate": 4.0081431641988335e-06, "loss": 2.5366, "step": 5346 }, { "epoch": 1.8224267211997274, "grad_norm": 18.490754730151576, "learning_rate": 4.0061995738067395e-06, "loss": 2.9418, "step": 5347 }, { "epoch": 1.8227675528289025, "grad_norm": 25.713681527336583, "learning_rate": 4.004256139743406e-06, "loss": 2.4698, "step": 5348 }, { "epoch": 1.8231083844580778, "grad_norm": 15.015974191721984, "learning_rate": 4.002312862314547e-06, "loss": 2.3324, "step": 5349 }, { "epoch": 1.8234492160872529, "grad_norm": 20.8739746475346, "learning_rate": 4.000369741825847e-06, "loss": 2.5385, "step": 5350 }, { "epoch": 1.823790047716428, "grad_norm": 12.5170399684396, "learning_rate": 3.998426778582964e-06, "loss": 2.0601, "step": 5351 }, { "epoch": 1.8241308793456033, "grad_norm": 13.909624554899443, "learning_rate": 3.996483972891537e-06, "loss": 2.4, "step": 5352 }, { "epoch": 1.8244717109747786, "grad_norm": 16.10633641251959, "learning_rate": 3.994541325057176e-06, "loss": 2.3286, "step": 5353 }, { "epoch": 1.8248125426039536, "grad_norm": 17.672322496093564, "learning_rate": 3.992598835385465e-06, "loss": 2.7052, "step": 5354 }, { "epoch": 1.8251533742331287, "grad_norm": 20.859456802996498, "learning_rate": 3.99065650418197e-06, "loss": 1.9969, "step": 5355 }, { "epoch": 1.825494205862304, "grad_norm": 19.639259208324454, "learning_rate": 3.9887143317522254e-06, "loss": 2.933, "step": 5356 }, { "epoch": 1.8258350374914794, "grad_norm": 14.48038345611974, "learning_rate": 3.9867723184017395e-06, "loss": 1.5502, "step": 5357 }, { "epoch": 1.8261758691206544, "grad_norm": 21.771392410132385, "learning_rate": 3.984830464436002e-06, "loss": 2.111, "step": 5358 }, { "epoch": 1.8265167007498295, "grad_norm": 17.84277477046626, "learning_rate": 3.982888770160475e-06, "loss": 2.8489, "step": 5359 }, { "epoch": 1.8268575323790048, "grad_norm": 19.44882956433336, "learning_rate": 3.980947235880591e-06, "loss": 2.2826, "step": 5360 }, { "epoch": 1.82719836400818, "grad_norm": 18.79453260160639, "learning_rate": 3.979005861901766e-06, "loss": 2.5535, "step": 5361 }, { "epoch": 1.827539195637355, "grad_norm": 22.18749627913491, "learning_rate": 3.977064648529382e-06, "loss": 2.3494, "step": 5362 }, { "epoch": 1.8278800272665303, "grad_norm": 18.467923935594293, "learning_rate": 3.9751235960688e-06, "loss": 2.4694, "step": 5363 }, { "epoch": 1.8282208588957056, "grad_norm": 15.567054424990191, "learning_rate": 3.973182704825358e-06, "loss": 2.4521, "step": 5364 }, { "epoch": 1.8285616905248807, "grad_norm": 15.986853982274303, "learning_rate": 3.971241975104364e-06, "loss": 1.9591, "step": 5365 }, { "epoch": 1.8289025221540558, "grad_norm": 11.969094264959878, "learning_rate": 3.969301407211101e-06, "loss": 1.9309, "step": 5366 }, { "epoch": 1.829243353783231, "grad_norm": 20.78736194190897, "learning_rate": 3.9673610014508305e-06, "loss": 1.9612, "step": 5367 }, { "epoch": 1.8295841854124064, "grad_norm": 15.219682106844013, "learning_rate": 3.965420758128785e-06, "loss": 2.3161, "step": 5368 }, { "epoch": 1.8299250170415815, "grad_norm": 24.200964629801554, "learning_rate": 3.963480677550172e-06, "loss": 2.4834, "step": 5369 }, { "epoch": 1.8302658486707566, "grad_norm": 36.8067726690729, "learning_rate": 3.961540760020176e-06, "loss": 2.1758, "step": 5370 }, { "epoch": 1.830606680299932, "grad_norm": 17.944613344852602, "learning_rate": 3.959601005843952e-06, "loss": 2.3653, "step": 5371 }, { "epoch": 1.830947511929107, "grad_norm": 20.634633030620336, "learning_rate": 3.957661415326629e-06, "loss": 2.4459, "step": 5372 }, { "epoch": 1.831288343558282, "grad_norm": 15.482432723208218, "learning_rate": 3.955721988773318e-06, "loss": 2.3225, "step": 5373 }, { "epoch": 1.8316291751874574, "grad_norm": 17.755063846197466, "learning_rate": 3.953782726489094e-06, "loss": 2.7196, "step": 5374 }, { "epoch": 1.8319700068166327, "grad_norm": 18.851793265046783, "learning_rate": 3.951843628779011e-06, "loss": 1.738, "step": 5375 }, { "epoch": 1.8323108384458078, "grad_norm": 33.082844764639724, "learning_rate": 3.949904695948098e-06, "loss": 1.7723, "step": 5376 }, { "epoch": 1.8326516700749829, "grad_norm": 12.650857740735146, "learning_rate": 3.9479659283013584e-06, "loss": 1.7583, "step": 5377 }, { "epoch": 1.8329925017041582, "grad_norm": 19.21770055819301, "learning_rate": 3.946027326143763e-06, "loss": 2.7085, "step": 5378 }, { "epoch": 1.8333333333333335, "grad_norm": 28.158030330742882, "learning_rate": 3.944088889780266e-06, "loss": 2.2155, "step": 5379 }, { "epoch": 1.8336741649625086, "grad_norm": 18.20117206044969, "learning_rate": 3.942150619515792e-06, "loss": 2.2372, "step": 5380 }, { "epoch": 1.8340149965916837, "grad_norm": 22.04513642868806, "learning_rate": 3.9402125156552365e-06, "loss": 1.7789, "step": 5381 }, { "epoch": 1.834355828220859, "grad_norm": 12.762118438532136, "learning_rate": 3.938274578503468e-06, "loss": 1.9846, "step": 5382 }, { "epoch": 1.834696659850034, "grad_norm": 16.561370649275347, "learning_rate": 3.936336808365337e-06, "loss": 2.1372, "step": 5383 }, { "epoch": 1.8350374914792091, "grad_norm": 15.631890059305396, "learning_rate": 3.93439920554566e-06, "loss": 2.3202, "step": 5384 }, { "epoch": 1.8353783231083844, "grad_norm": 18.554455407732313, "learning_rate": 3.9324617703492275e-06, "loss": 2.021, "step": 5385 }, { "epoch": 1.8357191547375598, "grad_norm": 17.09152752168175, "learning_rate": 3.9305245030808115e-06, "loss": 1.9107, "step": 5386 }, { "epoch": 1.8360599863667348, "grad_norm": 15.734942148775465, "learning_rate": 3.928587404045147e-06, "loss": 2.0328, "step": 5387 }, { "epoch": 1.83640081799591, "grad_norm": 17.087738906950293, "learning_rate": 3.926650473546948e-06, "loss": 2.1572, "step": 5388 }, { "epoch": 1.8367416496250852, "grad_norm": 33.23585351812588, "learning_rate": 3.924713711890904e-06, "loss": 1.883, "step": 5389 }, { "epoch": 1.8370824812542605, "grad_norm": 22.60373942993587, "learning_rate": 3.922777119381674e-06, "loss": 2.0568, "step": 5390 }, { "epoch": 1.8374233128834356, "grad_norm": 16.022359033614272, "learning_rate": 3.920840696323889e-06, "loss": 2.4661, "step": 5391 }, { "epoch": 1.8377641445126107, "grad_norm": 12.327123361942673, "learning_rate": 3.918904443022159e-06, "loss": 2.6154, "step": 5392 }, { "epoch": 1.838104976141786, "grad_norm": 20.143150152931838, "learning_rate": 3.916968359781066e-06, "loss": 2.0089, "step": 5393 }, { "epoch": 1.8384458077709611, "grad_norm": 49.41621929185133, "learning_rate": 3.915032446905158e-06, "loss": 3.0612, "step": 5394 }, { "epoch": 1.8387866394001362, "grad_norm": 20.091200356454596, "learning_rate": 3.913096704698968e-06, "loss": 2.1078, "step": 5395 }, { "epoch": 1.8391274710293115, "grad_norm": 18.080879377306367, "learning_rate": 3.9111611334669916e-06, "loss": 2.247, "step": 5396 }, { "epoch": 1.8394683026584868, "grad_norm": 13.655445105830692, "learning_rate": 3.909225733513702e-06, "loss": 2.1211, "step": 5397 }, { "epoch": 1.839809134287662, "grad_norm": 17.77306055611424, "learning_rate": 3.90729050514355e-06, "loss": 1.5655, "step": 5398 }, { "epoch": 1.840149965916837, "grad_norm": 16.773601542190285, "learning_rate": 3.905355448660949e-06, "loss": 2.4743, "step": 5399 }, { "epoch": 1.8404907975460123, "grad_norm": 17.64010912929376, "learning_rate": 3.903420564370293e-06, "loss": 2.4434, "step": 5400 }, { "epoch": 1.8408316291751876, "grad_norm": 17.26033404530369, "learning_rate": 3.901485852575948e-06, "loss": 2.3027, "step": 5401 }, { "epoch": 1.8411724608043627, "grad_norm": 19.486746221207007, "learning_rate": 3.899551313582254e-06, "loss": 1.5841, "step": 5402 }, { "epoch": 1.8415132924335378, "grad_norm": 22.68635468738929, "learning_rate": 3.897616947693515e-06, "loss": 1.9942, "step": 5403 }, { "epoch": 1.841854124062713, "grad_norm": 15.109666593361581, "learning_rate": 3.89568275521402e-06, "loss": 1.9629, "step": 5404 }, { "epoch": 1.8421949556918882, "grad_norm": 22.93100245109458, "learning_rate": 3.893748736448025e-06, "loss": 2.186, "step": 5405 }, { "epoch": 1.8425357873210633, "grad_norm": 30.58765026918194, "learning_rate": 3.891814891699755e-06, "loss": 2.2841, "step": 5406 }, { "epoch": 1.8428766189502386, "grad_norm": 18.320927284719897, "learning_rate": 3.889881221273416e-06, "loss": 2.8499, "step": 5407 }, { "epoch": 1.8432174505794139, "grad_norm": 16.99630439286904, "learning_rate": 3.887947725473179e-06, "loss": 2.2095, "step": 5408 }, { "epoch": 1.843558282208589, "grad_norm": 19.80053212080568, "learning_rate": 3.886014404603191e-06, "loss": 2.8838, "step": 5409 }, { "epoch": 1.843899113837764, "grad_norm": 18.031219010643383, "learning_rate": 3.8840812589675736e-06, "loss": 1.9273, "step": 5410 }, { "epoch": 1.8442399454669394, "grad_norm": 22.53288744734064, "learning_rate": 3.882148288870416e-06, "loss": 1.8267, "step": 5411 }, { "epoch": 1.8445807770961147, "grad_norm": 19.38998088783492, "learning_rate": 3.880215494615781e-06, "loss": 2.3386, "step": 5412 }, { "epoch": 1.8449216087252895, "grad_norm": 20.29531142559162, "learning_rate": 3.878282876507706e-06, "loss": 2.3972, "step": 5413 }, { "epoch": 1.8452624403544649, "grad_norm": 17.189241274637315, "learning_rate": 3.876350434850202e-06, "loss": 2.3633, "step": 5414 }, { "epoch": 1.8456032719836402, "grad_norm": 16.894366859659815, "learning_rate": 3.874418169947249e-06, "loss": 2.1585, "step": 5415 }, { "epoch": 1.8459441036128152, "grad_norm": 20.30972150329246, "learning_rate": 3.872486082102794e-06, "loss": 2.1053, "step": 5416 }, { "epoch": 1.8462849352419903, "grad_norm": 21.02858678578101, "learning_rate": 3.870554171620768e-06, "loss": 2.7315, "step": 5417 }, { "epoch": 1.8466257668711656, "grad_norm": 18.925724526703746, "learning_rate": 3.868622438805069e-06, "loss": 2.4989, "step": 5418 }, { "epoch": 1.846966598500341, "grad_norm": 20.563277971813807, "learning_rate": 3.866690883959562e-06, "loss": 1.9685, "step": 5419 }, { "epoch": 1.847307430129516, "grad_norm": 12.325027604122488, "learning_rate": 3.8647595073880914e-06, "loss": 2.384, "step": 5420 }, { "epoch": 1.8476482617586911, "grad_norm": 10.949704752712739, "learning_rate": 3.862828309394469e-06, "loss": 1.9477, "step": 5421 }, { "epoch": 1.8479890933878664, "grad_norm": 13.40415145401447, "learning_rate": 3.8608972902824794e-06, "loss": 1.7261, "step": 5422 }, { "epoch": 1.8483299250170417, "grad_norm": 12.208993916809291, "learning_rate": 3.8589664503558825e-06, "loss": 1.8894, "step": 5423 }, { "epoch": 1.8486707566462166, "grad_norm": 15.662102525429889, "learning_rate": 3.857035789918405e-06, "loss": 1.9553, "step": 5424 }, { "epoch": 1.849011588275392, "grad_norm": 10.840932719326052, "learning_rate": 3.855105309273746e-06, "loss": 1.9964, "step": 5425 }, { "epoch": 1.8493524199045672, "grad_norm": 17.245730742598422, "learning_rate": 3.85317500872558e-06, "loss": 2.9205, "step": 5426 }, { "epoch": 1.8496932515337423, "grad_norm": 16.829081013746404, "learning_rate": 3.851244888577552e-06, "loss": 1.4885, "step": 5427 }, { "epoch": 1.8500340831629174, "grad_norm": 20.834238005745817, "learning_rate": 3.8493149491332735e-06, "loss": 2.1753, "step": 5428 }, { "epoch": 1.8503749147920927, "grad_norm": 19.974372293410116, "learning_rate": 3.847385190696336e-06, "loss": 1.7784, "step": 5429 }, { "epoch": 1.850715746421268, "grad_norm": 11.251463239238076, "learning_rate": 3.845455613570296e-06, "loss": 1.925, "step": 5430 }, { "epoch": 1.851056578050443, "grad_norm": 59.14605982847282, "learning_rate": 3.843526218058682e-06, "loss": 2.4949, "step": 5431 }, { "epoch": 1.8513974096796182, "grad_norm": 20.797647069241894, "learning_rate": 3.841597004464999e-06, "loss": 2.0352, "step": 5432 }, { "epoch": 1.8517382413087935, "grad_norm": 17.855821203255736, "learning_rate": 3.839667973092719e-06, "loss": 1.989, "step": 5433 }, { "epoch": 1.8520790729379688, "grad_norm": 31.38899931925812, "learning_rate": 3.837739124245285e-06, "loss": 2.2728, "step": 5434 }, { "epoch": 1.8524199045671437, "grad_norm": 13.854555881633399, "learning_rate": 3.835810458226115e-06, "loss": 2.2048, "step": 5435 }, { "epoch": 1.852760736196319, "grad_norm": 21.11692354823943, "learning_rate": 3.833881975338594e-06, "loss": 2.5982, "step": 5436 }, { "epoch": 1.8531015678254943, "grad_norm": 15.97709005493656, "learning_rate": 3.831953675886078e-06, "loss": 2.4357, "step": 5437 }, { "epoch": 1.8534423994546694, "grad_norm": 16.937752780264564, "learning_rate": 3.8300255601718996e-06, "loss": 2.543, "step": 5438 }, { "epoch": 1.8537832310838445, "grad_norm": 19.46908120498656, "learning_rate": 3.82809762849936e-06, "loss": 2.0911, "step": 5439 }, { "epoch": 1.8541240627130198, "grad_norm": 17.583844201177495, "learning_rate": 3.826169881171726e-06, "loss": 1.7681, "step": 5440 }, { "epoch": 1.854464894342195, "grad_norm": 15.657335486424628, "learning_rate": 3.824242318492244e-06, "loss": 1.6985, "step": 5441 }, { "epoch": 1.8548057259713702, "grad_norm": 23.170581142449848, "learning_rate": 3.822314940764125e-06, "loss": 2.4023, "step": 5442 }, { "epoch": 1.8551465576005453, "grad_norm": 10.410521340717503, "learning_rate": 3.8203877482905525e-06, "loss": 1.7729, "step": 5443 }, { "epoch": 1.8554873892297206, "grad_norm": 13.083168093111494, "learning_rate": 3.818460741374686e-06, "loss": 2.3358, "step": 5444 }, { "epoch": 1.8558282208588959, "grad_norm": 19.761140240777944, "learning_rate": 3.816533920319647e-06, "loss": 2.1771, "step": 5445 }, { "epoch": 1.8561690524880707, "grad_norm": 19.84814108260719, "learning_rate": 3.8146072854285334e-06, "loss": 2.6458, "step": 5446 }, { "epoch": 1.856509884117246, "grad_norm": 18.438439881758473, "learning_rate": 3.812680837004411e-06, "loss": 1.9224, "step": 5447 }, { "epoch": 1.8568507157464214, "grad_norm": 12.66382205624291, "learning_rate": 3.8107545753503215e-06, "loss": 1.8907, "step": 5448 }, { "epoch": 1.8571915473755964, "grad_norm": 16.3787238926769, "learning_rate": 3.8088285007692723e-06, "loss": 2.8603, "step": 5449 }, { "epoch": 1.8575323790047715, "grad_norm": 17.82613604414563, "learning_rate": 3.8069026135642383e-06, "loss": 1.9485, "step": 5450 }, { "epoch": 1.8578732106339468, "grad_norm": 23.572689804199577, "learning_rate": 3.804976914038175e-06, "loss": 2.4331, "step": 5451 }, { "epoch": 1.8582140422631221, "grad_norm": 26.40614802022223, "learning_rate": 3.8030514024940004e-06, "loss": 2.4199, "step": 5452 }, { "epoch": 1.8585548738922972, "grad_norm": 20.86655132858453, "learning_rate": 3.801126079234603e-06, "loss": 2.5514, "step": 5453 }, { "epoch": 1.8588957055214723, "grad_norm": 16.493472163791374, "learning_rate": 3.799200944562848e-06, "loss": 2.6229, "step": 5454 }, { "epoch": 1.8592365371506476, "grad_norm": 13.22973088880136, "learning_rate": 3.7972759987815632e-06, "loss": 2.0509, "step": 5455 }, { "epoch": 1.8595773687798227, "grad_norm": 16.665693889711463, "learning_rate": 3.7953512421935505e-06, "loss": 2.5486, "step": 5456 }, { "epoch": 1.8599182004089978, "grad_norm": 11.325189655185142, "learning_rate": 3.7934266751015854e-06, "loss": 1.7887, "step": 5457 }, { "epoch": 1.860259032038173, "grad_norm": 18.23407797556481, "learning_rate": 3.791502297808406e-06, "loss": 2.629, "step": 5458 }, { "epoch": 1.8605998636673484, "grad_norm": 26.211388895607524, "learning_rate": 3.789578110616726e-06, "loss": 2.2025, "step": 5459 }, { "epoch": 1.8609406952965235, "grad_norm": 17.63498517150422, "learning_rate": 3.787654113829228e-06, "loss": 2.2585, "step": 5460 }, { "epoch": 1.8612815269256986, "grad_norm": 24.783256926563674, "learning_rate": 3.785730307748564e-06, "loss": 2.1737, "step": 5461 }, { "epoch": 1.861622358554874, "grad_norm": 13.151557703951811, "learning_rate": 3.7838066926773555e-06, "loss": 1.8326, "step": 5462 }, { "epoch": 1.8619631901840492, "grad_norm": 26.560757097872276, "learning_rate": 3.7818832689181956e-06, "loss": 2.0837, "step": 5463 }, { "epoch": 1.8623040218132243, "grad_norm": 18.6980271318111, "learning_rate": 3.7799600367736482e-06, "loss": 2.2287, "step": 5464 }, { "epoch": 1.8626448534423994, "grad_norm": 15.021311651804993, "learning_rate": 3.7780369965462414e-06, "loss": 2.0362, "step": 5465 }, { "epoch": 1.8629856850715747, "grad_norm": 25.322533654129792, "learning_rate": 3.7761141485384815e-06, "loss": 2.3419, "step": 5466 }, { "epoch": 1.8633265167007498, "grad_norm": 17.04701285225489, "learning_rate": 3.7741914930528372e-06, "loss": 2.3216, "step": 5467 }, { "epoch": 1.8636673483299249, "grad_norm": 20.08467943308246, "learning_rate": 3.7722690303917493e-06, "loss": 1.8963, "step": 5468 }, { "epoch": 1.8640081799591002, "grad_norm": 44.17629037827492, "learning_rate": 3.7703467608576326e-06, "loss": 2.7933, "step": 5469 }, { "epoch": 1.8643490115882755, "grad_norm": 26.887967101671023, "learning_rate": 3.768424684752865e-06, "loss": 1.7601, "step": 5470 }, { "epoch": 1.8646898432174506, "grad_norm": 18.564498926027866, "learning_rate": 3.7665028023797957e-06, "loss": 2.2128, "step": 5471 }, { "epoch": 1.8650306748466257, "grad_norm": 18.644052809913433, "learning_rate": 3.7645811140407463e-06, "loss": 2.5056, "step": 5472 }, { "epoch": 1.865371506475801, "grad_norm": 16.734606504052095, "learning_rate": 3.7626596200380073e-06, "loss": 2.3172, "step": 5473 }, { "epoch": 1.8657123381049763, "grad_norm": 15.531930133042572, "learning_rate": 3.7607383206738324e-06, "loss": 1.8331, "step": 5474 }, { "epoch": 1.8660531697341514, "grad_norm": 21.37672349743786, "learning_rate": 3.758817216250456e-06, "loss": 2.3419, "step": 5475 }, { "epoch": 1.8663940013633264, "grad_norm": 18.300953462532178, "learning_rate": 3.756896307070071e-06, "loss": 2.2766, "step": 5476 }, { "epoch": 1.8667348329925018, "grad_norm": 24.15597856189639, "learning_rate": 3.7549755934348463e-06, "loss": 1.9065, "step": 5477 }, { "epoch": 1.8670756646216768, "grad_norm": 13.484540627907016, "learning_rate": 3.7530550756469145e-06, "loss": 1.9062, "step": 5478 }, { "epoch": 1.867416496250852, "grad_norm": 18.123725643050438, "learning_rate": 3.751134754008386e-06, "loss": 2.4946, "step": 5479 }, { "epoch": 1.8677573278800272, "grad_norm": 15.410509573041058, "learning_rate": 3.7492146288213303e-06, "loss": 2.3069, "step": 5480 }, { "epoch": 1.8680981595092025, "grad_norm": 13.893625253870484, "learning_rate": 3.7472947003877924e-06, "loss": 1.9052, "step": 5481 }, { "epoch": 1.8684389911383776, "grad_norm": 17.871233625845978, "learning_rate": 3.745374969009786e-06, "loss": 1.8914, "step": 5482 }, { "epoch": 1.8687798227675527, "grad_norm": 17.622099451708582, "learning_rate": 3.7434554349892923e-06, "loss": 2.4595, "step": 5483 }, { "epoch": 1.869120654396728, "grad_norm": 15.064124254444001, "learning_rate": 3.741536098628258e-06, "loss": 2.4738, "step": 5484 }, { "epoch": 1.8694614860259033, "grad_norm": 13.780076661073135, "learning_rate": 3.739616960228606e-06, "loss": 2.1951, "step": 5485 }, { "epoch": 1.8698023176550784, "grad_norm": 20.928380804317737, "learning_rate": 3.737698020092225e-06, "loss": 2.6504, "step": 5486 }, { "epoch": 1.8701431492842535, "grad_norm": 16.76941858634153, "learning_rate": 3.735779278520968e-06, "loss": 2.1112, "step": 5487 }, { "epoch": 1.8704839809134288, "grad_norm": 32.62577829670677, "learning_rate": 3.733860735816665e-06, "loss": 2.0106, "step": 5488 }, { "epoch": 1.870824812542604, "grad_norm": 18.93530705333411, "learning_rate": 3.7319423922811076e-06, "loss": 2.3522, "step": 5489 }, { "epoch": 1.871165644171779, "grad_norm": 21.442813411104343, "learning_rate": 3.7300242482160588e-06, "loss": 1.7603, "step": 5490 }, { "epoch": 1.8715064758009543, "grad_norm": 20.179905948582636, "learning_rate": 3.728106303923253e-06, "loss": 3.0635, "step": 5491 }, { "epoch": 1.8718473074301296, "grad_norm": 17.81000144401281, "learning_rate": 3.726188559704388e-06, "loss": 1.8042, "step": 5492 }, { "epoch": 1.8721881390593047, "grad_norm": 16.617553961970074, "learning_rate": 3.7242710158611327e-06, "loss": 2.2456, "step": 5493 }, { "epoch": 1.8725289706884798, "grad_norm": 22.77100741407869, "learning_rate": 3.722353672695126e-06, "loss": 2.3352, "step": 5494 }, { "epoch": 1.872869802317655, "grad_norm": 19.270056466659234, "learning_rate": 3.7204365305079726e-06, "loss": 1.7213, "step": 5495 }, { "epoch": 1.8732106339468304, "grad_norm": 24.127906696032294, "learning_rate": 3.718519589601245e-06, "loss": 1.9903, "step": 5496 }, { "epoch": 1.8735514655760055, "grad_norm": 23.55448152214235, "learning_rate": 3.7166028502764886e-06, "loss": 2.5481, "step": 5497 }, { "epoch": 1.8738922972051806, "grad_norm": 18.057559134261453, "learning_rate": 3.7146863128352135e-06, "loss": 2.5345, "step": 5498 }, { "epoch": 1.8742331288343559, "grad_norm": 17.134444421177733, "learning_rate": 3.7127699775788954e-06, "loss": 2.4588, "step": 5499 }, { "epoch": 1.874573960463531, "grad_norm": 20.60055181645457, "learning_rate": 3.710853844808987e-06, "loss": 2.4044, "step": 5500 }, { "epoch": 1.874914792092706, "grad_norm": 15.130182563402412, "learning_rate": 3.708937914826899e-06, "loss": 1.9754, "step": 5501 }, { "epoch": 1.8752556237218814, "grad_norm": 19.587476774282692, "learning_rate": 3.707022187934015e-06, "loss": 2.0481, "step": 5502 }, { "epoch": 1.8755964553510567, "grad_norm": 15.89547911229631, "learning_rate": 3.7051066644316907e-06, "loss": 2.2115, "step": 5503 }, { "epoch": 1.8759372869802318, "grad_norm": 12.172798697848298, "learning_rate": 3.7031913446212427e-06, "loss": 1.9933, "step": 5504 }, { "epoch": 1.8762781186094069, "grad_norm": 17.40168362380706, "learning_rate": 3.701276228803956e-06, "loss": 1.9696, "step": 5505 }, { "epoch": 1.8766189502385822, "grad_norm": 14.111624279443772, "learning_rate": 3.6993613172810894e-06, "loss": 2.2558, "step": 5506 }, { "epoch": 1.8769597818677575, "grad_norm": 32.4253237521203, "learning_rate": 3.6974466103538665e-06, "loss": 2.2649, "step": 5507 }, { "epoch": 1.8773006134969326, "grad_norm": 13.69628430636695, "learning_rate": 3.695532108323477e-06, "loss": 1.728, "step": 5508 }, { "epoch": 1.8776414451261076, "grad_norm": 14.724410674287563, "learning_rate": 3.693617811491077e-06, "loss": 2.1824, "step": 5509 }, { "epoch": 1.877982276755283, "grad_norm": 14.19634359268822, "learning_rate": 3.6917037201577977e-06, "loss": 2.2161, "step": 5510 }, { "epoch": 1.878323108384458, "grad_norm": 13.850615253818324, "learning_rate": 3.6897898346247317e-06, "loss": 2.3473, "step": 5511 }, { "epoch": 1.8786639400136331, "grad_norm": 12.795678142611386, "learning_rate": 3.6878761551929383e-06, "loss": 1.7457, "step": 5512 }, { "epoch": 1.8790047716428084, "grad_norm": 30.238673527129038, "learning_rate": 3.685962682163451e-06, "loss": 2.3129, "step": 5513 }, { "epoch": 1.8793456032719837, "grad_norm": 10.346640886699666, "learning_rate": 3.6840494158372637e-06, "loss": 1.8949, "step": 5514 }, { "epoch": 1.8796864349011588, "grad_norm": 17.081333530839462, "learning_rate": 3.68213635651534e-06, "loss": 1.8719, "step": 5515 }, { "epoch": 1.880027266530334, "grad_norm": 24.15274003870877, "learning_rate": 3.6802235044986166e-06, "loss": 2.2626, "step": 5516 }, { "epoch": 1.8803680981595092, "grad_norm": 14.125268132931238, "learning_rate": 3.6783108600879885e-06, "loss": 2.3088, "step": 5517 }, { "epoch": 1.8807089297886845, "grad_norm": 21.377144442066097, "learning_rate": 3.676398423584323e-06, "loss": 2.2857, "step": 5518 }, { "epoch": 1.8810497614178596, "grad_norm": 14.241330198250836, "learning_rate": 3.6744861952884547e-06, "loss": 1.901, "step": 5519 }, { "epoch": 1.8813905930470347, "grad_norm": 16.17560137049466, "learning_rate": 3.672574175501186e-06, "loss": 2.4718, "step": 5520 }, { "epoch": 1.88173142467621, "grad_norm": 14.491959881134107, "learning_rate": 3.670662364523281e-06, "loss": 1.6325, "step": 5521 }, { "epoch": 1.882072256305385, "grad_norm": 12.618588002554587, "learning_rate": 3.6687507626554784e-06, "loss": 2.5788, "step": 5522 }, { "epoch": 1.8824130879345602, "grad_norm": 18.380739449373838, "learning_rate": 3.6668393701984828e-06, "loss": 1.769, "step": 5523 }, { "epoch": 1.8827539195637355, "grad_norm": 16.82379569706096, "learning_rate": 3.6649281874529578e-06, "loss": 1.9779, "step": 5524 }, { "epoch": 1.8830947511929108, "grad_norm": 16.67131086997878, "learning_rate": 3.6630172147195463e-06, "loss": 2.3426, "step": 5525 }, { "epoch": 1.883435582822086, "grad_norm": 18.313662072453663, "learning_rate": 3.6611064522988477e-06, "loss": 2.0197, "step": 5526 }, { "epoch": 1.883776414451261, "grad_norm": 15.345352349840233, "learning_rate": 3.6591959004914333e-06, "loss": 2.6541, "step": 5527 }, { "epoch": 1.8841172460804363, "grad_norm": 16.818335475135736, "learning_rate": 3.657285559597843e-06, "loss": 2.4203, "step": 5528 }, { "epoch": 1.8844580777096116, "grad_norm": 26.008528569933954, "learning_rate": 3.6553754299185796e-06, "loss": 2.5216, "step": 5529 }, { "epoch": 1.8847989093387867, "grad_norm": 18.415761570424372, "learning_rate": 3.6534655117541107e-06, "loss": 2.2407, "step": 5530 }, { "epoch": 1.8851397409679618, "grad_norm": 23.355329174713432, "learning_rate": 3.6515558054048788e-06, "loss": 2.0337, "step": 5531 }, { "epoch": 1.885480572597137, "grad_norm": 17.644044481262387, "learning_rate": 3.6496463111712876e-06, "loss": 2.2844, "step": 5532 }, { "epoch": 1.8858214042263122, "grad_norm": 19.748746874800066, "learning_rate": 3.647737029353705e-06, "loss": 2.1656, "step": 5533 }, { "epoch": 1.8861622358554873, "grad_norm": 18.93327421693914, "learning_rate": 3.6458279602524727e-06, "loss": 1.8169, "step": 5534 }, { "epoch": 1.8865030674846626, "grad_norm": 20.997847924488294, "learning_rate": 3.643919104167892e-06, "loss": 2.3983, "step": 5535 }, { "epoch": 1.8868438991138379, "grad_norm": 20.719673945368257, "learning_rate": 3.6420104614002326e-06, "loss": 1.994, "step": 5536 }, { "epoch": 1.887184730743013, "grad_norm": 15.474281346169347, "learning_rate": 3.640102032249737e-06, "loss": 2.1711, "step": 5537 }, { "epoch": 1.887525562372188, "grad_norm": 17.659204670184543, "learning_rate": 3.638193817016604e-06, "loss": 2.977, "step": 5538 }, { "epoch": 1.8878663940013634, "grad_norm": 19.08265176381865, "learning_rate": 3.636285816001003e-06, "loss": 2.5344, "step": 5539 }, { "epoch": 1.8882072256305387, "grad_norm": 14.856231534612647, "learning_rate": 3.634378029503073e-06, "loss": 2.2926, "step": 5540 }, { "epoch": 1.8885480572597138, "grad_norm": 15.222727051196117, "learning_rate": 3.632470457822916e-06, "loss": 2.3596, "step": 5541 }, { "epoch": 1.8888888888888888, "grad_norm": 19.889453278519966, "learning_rate": 3.6305631012605997e-06, "loss": 2.485, "step": 5542 }, { "epoch": 1.8892297205180641, "grad_norm": 16.80307559385271, "learning_rate": 3.628655960116157e-06, "loss": 2.8447, "step": 5543 }, { "epoch": 1.8895705521472392, "grad_norm": 19.5781477951071, "learning_rate": 3.6267490346895917e-06, "loss": 2.6112, "step": 5544 }, { "epoch": 1.8899113837764143, "grad_norm": 21.382007540252403, "learning_rate": 3.624842325280871e-06, "loss": 2.7581, "step": 5545 }, { "epoch": 1.8902522154055896, "grad_norm": 20.119814009338796, "learning_rate": 3.6229358321899243e-06, "loss": 2.2778, "step": 5546 }, { "epoch": 1.890593047034765, "grad_norm": 20.176608153244405, "learning_rate": 3.621029555716656e-06, "loss": 2.4865, "step": 5547 }, { "epoch": 1.89093387866394, "grad_norm": 17.949145233942073, "learning_rate": 3.619123496160926e-06, "loss": 2.4204, "step": 5548 }, { "epoch": 1.8912747102931151, "grad_norm": 19.030555249930458, "learning_rate": 3.6172176538225667e-06, "loss": 2.2029, "step": 5549 }, { "epoch": 1.8916155419222904, "grad_norm": 12.279642804269457, "learning_rate": 3.6153120290013767e-06, "loss": 2.2854, "step": 5550 }, { "epoch": 1.8919563735514657, "grad_norm": 21.713804143441553, "learning_rate": 3.6134066219971155e-06, "loss": 1.8023, "step": 5551 }, { "epoch": 1.8922972051806408, "grad_norm": 15.983897393054429, "learning_rate": 3.6115014331095113e-06, "loss": 2.6156, "step": 5552 }, { "epoch": 1.892638036809816, "grad_norm": 20.778649932518086, "learning_rate": 3.6095964626382607e-06, "loss": 2.2898, "step": 5553 }, { "epoch": 1.8929788684389912, "grad_norm": 21.594828585515383, "learning_rate": 3.607691710883022e-06, "loss": 2.0296, "step": 5554 }, { "epoch": 1.8933197000681663, "grad_norm": 20.228634313644406, "learning_rate": 3.6057871781434172e-06, "loss": 2.4109, "step": 5555 }, { "epoch": 1.8936605316973414, "grad_norm": 20.211987189810245, "learning_rate": 3.6038828647190404e-06, "loss": 2.3998, "step": 5556 }, { "epoch": 1.8940013633265167, "grad_norm": 15.105849218354074, "learning_rate": 3.6019787709094485e-06, "loss": 2.5004, "step": 5557 }, { "epoch": 1.894342194955692, "grad_norm": 26.850784959219894, "learning_rate": 3.600074897014158e-06, "loss": 2.301, "step": 5558 }, { "epoch": 1.894683026584867, "grad_norm": 16.479261936083503, "learning_rate": 3.5981712433326617e-06, "loss": 1.5512, "step": 5559 }, { "epoch": 1.8950238582140422, "grad_norm": 18.11122479618633, "learning_rate": 3.596267810164408e-06, "loss": 1.7308, "step": 5560 }, { "epoch": 1.8953646898432175, "grad_norm": 17.06471092515031, "learning_rate": 3.5943645978088145e-06, "loss": 2.3743, "step": 5561 }, { "epoch": 1.8957055214723928, "grad_norm": 20.77541465772056, "learning_rate": 3.592461606565267e-06, "loss": 2.5879, "step": 5562 }, { "epoch": 1.8960463531015677, "grad_norm": 18.053744649087598, "learning_rate": 3.5905588367331123e-06, "loss": 2.0665, "step": 5563 }, { "epoch": 1.896387184730743, "grad_norm": 22.91768731667087, "learning_rate": 3.588656288611661e-06, "loss": 2.7063, "step": 5564 }, { "epoch": 1.8967280163599183, "grad_norm": 14.446698628296824, "learning_rate": 3.5867539625001947e-06, "loss": 1.9811, "step": 5565 }, { "epoch": 1.8970688479890934, "grad_norm": 19.599639879044183, "learning_rate": 3.5848518586979575e-06, "loss": 2.431, "step": 5566 }, { "epoch": 1.8974096796182685, "grad_norm": 16.584948141070257, "learning_rate": 3.5829499775041535e-06, "loss": 2.153, "step": 5567 }, { "epoch": 1.8977505112474438, "grad_norm": 16.63980227457862, "learning_rate": 3.5810483192179616e-06, "loss": 2.5495, "step": 5568 }, { "epoch": 1.898091342876619, "grad_norm": 16.379059629990945, "learning_rate": 3.579146884138517e-06, "loss": 2.2716, "step": 5569 }, { "epoch": 1.8984321745057942, "grad_norm": 16.738709589305877, "learning_rate": 3.5772456725649213e-06, "loss": 2.0358, "step": 5570 }, { "epoch": 1.8987730061349692, "grad_norm": 17.536505857444812, "learning_rate": 3.575344684796248e-06, "loss": 2.8603, "step": 5571 }, { "epoch": 1.8991138377641446, "grad_norm": 15.617976006516926, "learning_rate": 3.5734439211315264e-06, "loss": 1.8207, "step": 5572 }, { "epoch": 1.8994546693933199, "grad_norm": 17.811819914602616, "learning_rate": 3.5715433818697543e-06, "loss": 2.0093, "step": 5573 }, { "epoch": 1.8997955010224947, "grad_norm": 21.128101082923994, "learning_rate": 3.569643067309893e-06, "loss": 2.2194, "step": 5574 }, { "epoch": 1.90013633265167, "grad_norm": 20.555240301422977, "learning_rate": 3.5677429777508736e-06, "loss": 2.7883, "step": 5575 }, { "epoch": 1.9004771642808453, "grad_norm": 15.053221908211968, "learning_rate": 3.5658431134915843e-06, "loss": 1.6976, "step": 5576 }, { "epoch": 1.9008179959100204, "grad_norm": 15.855010866354467, "learning_rate": 3.563943474830882e-06, "loss": 2.4734, "step": 5577 }, { "epoch": 1.9011588275391955, "grad_norm": 12.872288761351575, "learning_rate": 3.5620440620675876e-06, "loss": 1.9302, "step": 5578 }, { "epoch": 1.9014996591683708, "grad_norm": 20.448171569369762, "learning_rate": 3.5601448755004888e-06, "loss": 2.4691, "step": 5579 }, { "epoch": 1.9018404907975461, "grad_norm": 16.73904471512243, "learning_rate": 3.5582459154283288e-06, "loss": 2.0307, "step": 5580 }, { "epoch": 1.9021813224267212, "grad_norm": 16.67133662199536, "learning_rate": 3.5563471821498274e-06, "loss": 2.299, "step": 5581 }, { "epoch": 1.9025221540558963, "grad_norm": 19.915230317972554, "learning_rate": 3.5544486759636625e-06, "loss": 2.3992, "step": 5582 }, { "epoch": 1.9028629856850716, "grad_norm": 20.797949783204267, "learning_rate": 3.5525503971684728e-06, "loss": 2.6365, "step": 5583 }, { "epoch": 1.903203817314247, "grad_norm": 20.91887378369376, "learning_rate": 3.55065234606287e-06, "loss": 1.9854, "step": 5584 }, { "epoch": 1.9035446489434218, "grad_norm": 20.268762795144838, "learning_rate": 3.548754522945421e-06, "loss": 2.4453, "step": 5585 }, { "epoch": 1.903885480572597, "grad_norm": 17.6624068616757, "learning_rate": 3.5468569281146623e-06, "loss": 2.7625, "step": 5586 }, { "epoch": 1.9042263122017724, "grad_norm": 15.114643944708346, "learning_rate": 3.5449595618690957e-06, "loss": 1.7075, "step": 5587 }, { "epoch": 1.9045671438309475, "grad_norm": 27.829708912139864, "learning_rate": 3.5430624245071814e-06, "loss": 2.5964, "step": 5588 }, { "epoch": 1.9049079754601226, "grad_norm": 25.7360027561855, "learning_rate": 3.541165516327346e-06, "loss": 2.3428, "step": 5589 }, { "epoch": 1.905248807089298, "grad_norm": 26.908220138903317, "learning_rate": 3.5392688376279827e-06, "loss": 2.603, "step": 5590 }, { "epoch": 1.9055896387184732, "grad_norm": 16.98849905257533, "learning_rate": 3.5373723887074474e-06, "loss": 1.9958, "step": 5591 }, { "epoch": 1.9059304703476483, "grad_norm": 20.544282178375937, "learning_rate": 3.5354761698640555e-06, "loss": 2.5946, "step": 5592 }, { "epoch": 1.9062713019768234, "grad_norm": 18.079028959040244, "learning_rate": 3.5335801813960946e-06, "loss": 2.2374, "step": 5593 }, { "epoch": 1.9066121336059987, "grad_norm": 22.84246611095971, "learning_rate": 3.5316844236018067e-06, "loss": 1.9726, "step": 5594 }, { "epoch": 1.9069529652351738, "grad_norm": 14.839983385623013, "learning_rate": 3.5297888967794037e-06, "loss": 2.7813, "step": 5595 }, { "epoch": 1.9072937968643489, "grad_norm": 16.65762275551025, "learning_rate": 3.5278936012270613e-06, "loss": 2.6856, "step": 5596 }, { "epoch": 1.9076346284935242, "grad_norm": 20.86005673205567, "learning_rate": 3.5259985372429162e-06, "loss": 1.8478, "step": 5597 }, { "epoch": 1.9079754601226995, "grad_norm": 21.863554416652864, "learning_rate": 3.5241037051250666e-06, "loss": 1.9022, "step": 5598 }, { "epoch": 1.9083162917518746, "grad_norm": 16.78819145312999, "learning_rate": 3.5222091051715803e-06, "loss": 2.437, "step": 5599 }, { "epoch": 1.9086571233810496, "grad_norm": 18.73597889820278, "learning_rate": 3.520314737680485e-06, "loss": 2.1715, "step": 5600 }, { "epoch": 1.908997955010225, "grad_norm": 18.424780847321266, "learning_rate": 3.5184206029497705e-06, "loss": 2.3903, "step": 5601 }, { "epoch": 1.9093387866394003, "grad_norm": 13.974155858847043, "learning_rate": 3.516526701277395e-06, "loss": 1.7176, "step": 5602 }, { "epoch": 1.9096796182685754, "grad_norm": 14.48317957698722, "learning_rate": 3.5146330329612733e-06, "loss": 2.1846, "step": 5603 }, { "epoch": 1.9100204498977504, "grad_norm": 22.94630302878094, "learning_rate": 3.5127395982992874e-06, "loss": 2.1717, "step": 5604 }, { "epoch": 1.9103612815269257, "grad_norm": 16.164038544585825, "learning_rate": 3.5108463975892855e-06, "loss": 2.0537, "step": 5605 }, { "epoch": 1.9107021131561008, "grad_norm": 13.963995110630957, "learning_rate": 3.508953431129073e-06, "loss": 1.9119, "step": 5606 }, { "epoch": 1.911042944785276, "grad_norm": 18.71787540881993, "learning_rate": 3.5070606992164204e-06, "loss": 2.1594, "step": 5607 }, { "epoch": 1.9113837764144512, "grad_norm": 17.517107753734784, "learning_rate": 3.5051682021490617e-06, "loss": 1.9308, "step": 5608 }, { "epoch": 1.9117246080436265, "grad_norm": 12.663197458557725, "learning_rate": 3.5032759402246975e-06, "loss": 1.8802, "step": 5609 }, { "epoch": 1.9120654396728016, "grad_norm": 14.206608682014092, "learning_rate": 3.5013839137409857e-06, "loss": 2.1909, "step": 5610 }, { "epoch": 1.9124062713019767, "grad_norm": 18.48727210669149, "learning_rate": 3.4994921229955476e-06, "loss": 2.0807, "step": 5611 }, { "epoch": 1.912747102931152, "grad_norm": 20.409113496644782, "learning_rate": 3.497600568285975e-06, "loss": 2.0953, "step": 5612 }, { "epoch": 1.9130879345603273, "grad_norm": 23.568047119844575, "learning_rate": 3.4957092499098123e-06, "loss": 3.1201, "step": 5613 }, { "epoch": 1.9134287661895024, "grad_norm": 12.678549786480689, "learning_rate": 3.4938181681645723e-06, "loss": 2.0361, "step": 5614 }, { "epoch": 1.9137695978186775, "grad_norm": 17.456280837494003, "learning_rate": 3.4919273233477304e-06, "loss": 2.1769, "step": 5615 }, { "epoch": 1.9141104294478528, "grad_norm": 17.747463086592244, "learning_rate": 3.490036715756725e-06, "loss": 1.7644, "step": 5616 }, { "epoch": 1.914451261077028, "grad_norm": 18.88619553059399, "learning_rate": 3.4881463456889524e-06, "loss": 2.1973, "step": 5617 }, { "epoch": 1.914792092706203, "grad_norm": 13.806710656722867, "learning_rate": 3.486256213441781e-06, "loss": 2.3237, "step": 5618 }, { "epoch": 1.9151329243353783, "grad_norm": 14.966256240260128, "learning_rate": 3.484366319312531e-06, "loss": 2.5289, "step": 5619 }, { "epoch": 1.9154737559645536, "grad_norm": 15.722141765536367, "learning_rate": 3.4824766635984917e-06, "loss": 2.1837, "step": 5620 }, { "epoch": 1.9158145875937287, "grad_norm": 12.450166107828991, "learning_rate": 3.480587246596917e-06, "loss": 1.8585, "step": 5621 }, { "epoch": 1.9161554192229038, "grad_norm": 17.22469050739553, "learning_rate": 3.4786980686050166e-06, "loss": 2.5615, "step": 5622 }, { "epoch": 1.916496250852079, "grad_norm": 17.15954296891523, "learning_rate": 3.4768091299199636e-06, "loss": 2.5441, "step": 5623 }, { "epoch": 1.9168370824812544, "grad_norm": 20.68934956734518, "learning_rate": 3.4749204308388982e-06, "loss": 2.1294, "step": 5624 }, { "epoch": 1.9171779141104295, "grad_norm": 21.755049329096945, "learning_rate": 3.4730319716589222e-06, "loss": 1.9404, "step": 5625 }, { "epoch": 1.9175187457396046, "grad_norm": 18.68085217194838, "learning_rate": 3.471143752677093e-06, "loss": 2.7943, "step": 5626 }, { "epoch": 1.9178595773687799, "grad_norm": 23.79516780964141, "learning_rate": 3.46925577419044e-06, "loss": 1.9808, "step": 5627 }, { "epoch": 1.918200408997955, "grad_norm": 17.84499092158308, "learning_rate": 3.4673680364959455e-06, "loss": 2.1692, "step": 5628 }, { "epoch": 1.91854124062713, "grad_norm": 22.552971826758533, "learning_rate": 3.4654805398905594e-06, "loss": 2.4174, "step": 5629 }, { "epoch": 1.9188820722563054, "grad_norm": 18.470612978652458, "learning_rate": 3.4635932846711952e-06, "loss": 2.3553, "step": 5630 }, { "epoch": 1.9192229038854807, "grad_norm": 14.860869063311448, "learning_rate": 3.4617062711347225e-06, "loss": 1.8596, "step": 5631 }, { "epoch": 1.9195637355146558, "grad_norm": 18.243754193451856, "learning_rate": 3.4598194995779755e-06, "loss": 2.7957, "step": 5632 }, { "epoch": 1.9199045671438308, "grad_norm": 16.989146862003317, "learning_rate": 3.4579329702977536e-06, "loss": 2.7325, "step": 5633 }, { "epoch": 1.9202453987730062, "grad_norm": 22.62361816317594, "learning_rate": 3.4560466835908144e-06, "loss": 2.6324, "step": 5634 }, { "epoch": 1.9205862304021815, "grad_norm": 26.422617044343443, "learning_rate": 3.4541606397538766e-06, "loss": 2.3887, "step": 5635 }, { "epoch": 1.9209270620313565, "grad_norm": 19.34323904201938, "learning_rate": 3.4522748390836257e-06, "loss": 2.6277, "step": 5636 }, { "epoch": 1.9212678936605316, "grad_norm": 13.792169274596347, "learning_rate": 3.450389281876703e-06, "loss": 1.7431, "step": 5637 }, { "epoch": 1.921608725289707, "grad_norm": 39.34466237148315, "learning_rate": 3.4485039684297156e-06, "loss": 1.843, "step": 5638 }, { "epoch": 1.921949556918882, "grad_norm": 18.347517342433257, "learning_rate": 3.446618899039228e-06, "loss": 2.0572, "step": 5639 }, { "epoch": 1.9222903885480571, "grad_norm": 15.081028577185485, "learning_rate": 3.444734074001772e-06, "loss": 2.2265, "step": 5640 }, { "epoch": 1.9226312201772324, "grad_norm": 11.378503127785025, "learning_rate": 3.442849493613839e-06, "loss": 1.978, "step": 5641 }, { "epoch": 1.9229720518064077, "grad_norm": 12.591626126915639, "learning_rate": 3.440965158171877e-06, "loss": 1.7706, "step": 5642 }, { "epoch": 1.9233128834355828, "grad_norm": 24.208533703360317, "learning_rate": 3.4390810679723043e-06, "loss": 2.4827, "step": 5643 }, { "epoch": 1.923653715064758, "grad_norm": 18.11515509963514, "learning_rate": 3.4371972233114914e-06, "loss": 2.7296, "step": 5644 }, { "epoch": 1.9239945466939332, "grad_norm": 19.735214072388686, "learning_rate": 3.4353136244857753e-06, "loss": 1.7945, "step": 5645 }, { "epoch": 1.9243353783231085, "grad_norm": 15.911458076153854, "learning_rate": 3.433430271791457e-06, "loss": 1.9612, "step": 5646 }, { "epoch": 1.9246762099522836, "grad_norm": 14.144371171976402, "learning_rate": 3.4315471655247932e-06, "loss": 2.1085, "step": 5647 }, { "epoch": 1.9250170415814587, "grad_norm": 16.37502457838329, "learning_rate": 3.429664305982002e-06, "loss": 2.0092, "step": 5648 }, { "epoch": 1.925357873210634, "grad_norm": 16.478007238207738, "learning_rate": 3.4277816934592676e-06, "loss": 2.0521, "step": 5649 }, { "epoch": 1.925698704839809, "grad_norm": 18.450154291478864, "learning_rate": 3.4258993282527332e-06, "loss": 2.3042, "step": 5650 }, { "epoch": 1.9260395364689842, "grad_norm": 16.753240710163613, "learning_rate": 3.4240172106584975e-06, "loss": 2.6172, "step": 5651 }, { "epoch": 1.9263803680981595, "grad_norm": 16.10825816059553, "learning_rate": 3.4221353409726303e-06, "loss": 2.3384, "step": 5652 }, { "epoch": 1.9267211997273348, "grad_norm": 16.758206491052754, "learning_rate": 3.4202537194911546e-06, "loss": 2.437, "step": 5653 }, { "epoch": 1.9270620313565099, "grad_norm": 14.793720060978977, "learning_rate": 3.418372346510056e-06, "loss": 2.1786, "step": 5654 }, { "epoch": 1.927402862985685, "grad_norm": 18.496545379992753, "learning_rate": 3.4164912223252856e-06, "loss": 2.3196, "step": 5655 }, { "epoch": 1.9277436946148603, "grad_norm": 15.178406616159545, "learning_rate": 3.4146103472327497e-06, "loss": 2.1414, "step": 5656 }, { "epoch": 1.9280845262440356, "grad_norm": 16.326226236074, "learning_rate": 3.4127297215283144e-06, "loss": 2.186, "step": 5657 }, { "epoch": 1.9284253578732107, "grad_norm": 41.26069322594157, "learning_rate": 3.410849345507814e-06, "loss": 2.7666, "step": 5658 }, { "epoch": 1.9287661895023858, "grad_norm": 14.90282409509364, "learning_rate": 3.408969219467039e-06, "loss": 2.293, "step": 5659 }, { "epoch": 1.929107021131561, "grad_norm": 20.012160804395556, "learning_rate": 3.4070893437017367e-06, "loss": 1.6989, "step": 5660 }, { "epoch": 1.9294478527607362, "grad_norm": 21.18850908428512, "learning_rate": 3.405209718507624e-06, "loss": 1.8225, "step": 5661 }, { "epoch": 1.9297886843899112, "grad_norm": 23.10664377633577, "learning_rate": 3.4033303441803702e-06, "loss": 2.3421, "step": 5662 }, { "epoch": 1.9301295160190866, "grad_norm": 20.947637106742405, "learning_rate": 3.4014512210156082e-06, "loss": 2.4207, "step": 5663 }, { "epoch": 1.9304703476482619, "grad_norm": 18.99150168444652, "learning_rate": 3.399572349308934e-06, "loss": 2.1143, "step": 5664 }, { "epoch": 1.930811179277437, "grad_norm": 23.28858107985881, "learning_rate": 3.397693729355901e-06, "loss": 1.8742, "step": 5665 }, { "epoch": 1.931152010906612, "grad_norm": 17.94964050659806, "learning_rate": 3.3958153614520194e-06, "loss": 1.9081, "step": 5666 }, { "epoch": 1.9314928425357873, "grad_norm": 20.637905578879224, "learning_rate": 3.39393724589277e-06, "loss": 1.6868, "step": 5667 }, { "epoch": 1.9318336741649627, "grad_norm": 16.446261266672416, "learning_rate": 3.3920593829735855e-06, "loss": 2.0212, "step": 5668 }, { "epoch": 1.9321745057941377, "grad_norm": 17.54566547904794, "learning_rate": 3.390181772989859e-06, "loss": 2.4183, "step": 5669 }, { "epoch": 1.9325153374233128, "grad_norm": 14.974031377006112, "learning_rate": 3.3883044162369483e-06, "loss": 2.0128, "step": 5670 }, { "epoch": 1.9328561690524881, "grad_norm": 14.271743492252666, "learning_rate": 3.3864273130101705e-06, "loss": 1.8295, "step": 5671 }, { "epoch": 1.9331970006816632, "grad_norm": 14.244702205717381, "learning_rate": 3.3845504636047995e-06, "loss": 2.2481, "step": 5672 }, { "epoch": 1.9335378323108383, "grad_norm": 16.18297526822987, "learning_rate": 3.3826738683160685e-06, "loss": 2.2002, "step": 5673 }, { "epoch": 1.9338786639400136, "grad_norm": 19.583610417031558, "learning_rate": 3.380797527439178e-06, "loss": 2.3968, "step": 5674 }, { "epoch": 1.934219495569189, "grad_norm": 12.590362421429102, "learning_rate": 3.3789214412692833e-06, "loss": 1.7645, "step": 5675 }, { "epoch": 1.934560327198364, "grad_norm": 14.891583330713507, "learning_rate": 3.3770456101014967e-06, "loss": 2.1964, "step": 5676 }, { "epoch": 1.934901158827539, "grad_norm": 19.95567816572072, "learning_rate": 3.375170034230898e-06, "loss": 1.7691, "step": 5677 }, { "epoch": 1.9352419904567144, "grad_norm": 14.435772291560511, "learning_rate": 3.373294713952521e-06, "loss": 2.1553, "step": 5678 }, { "epoch": 1.9355828220858897, "grad_norm": 19.678758409587854, "learning_rate": 3.3714196495613584e-06, "loss": 2.2501, "step": 5679 }, { "epoch": 1.9359236537150648, "grad_norm": 25.616739424100103, "learning_rate": 3.3695448413523712e-06, "loss": 2.2277, "step": 5680 }, { "epoch": 1.93626448534424, "grad_norm": 11.715362211935195, "learning_rate": 3.3676702896204703e-06, "loss": 1.7563, "step": 5681 }, { "epoch": 1.9366053169734152, "grad_norm": 23.1252944079966, "learning_rate": 3.365795994660529e-06, "loss": 2.1098, "step": 5682 }, { "epoch": 1.9369461486025903, "grad_norm": 12.998331763938001, "learning_rate": 3.363921956767384e-06, "loss": 2.0097, "step": 5683 }, { "epoch": 1.9372869802317654, "grad_norm": 14.165779186255829, "learning_rate": 3.3620481762358292e-06, "loss": 2.1563, "step": 5684 }, { "epoch": 1.9376278118609407, "grad_norm": 16.72424741903025, "learning_rate": 3.360174653360615e-06, "loss": 2.0879, "step": 5685 }, { "epoch": 1.937968643490116, "grad_norm": 27.203504795620454, "learning_rate": 3.3583013884364567e-06, "loss": 2.1778, "step": 5686 }, { "epoch": 1.938309475119291, "grad_norm": 16.156666480343542, "learning_rate": 3.3564283817580247e-06, "loss": 1.9652, "step": 5687 }, { "epoch": 1.9386503067484662, "grad_norm": 13.606261908620045, "learning_rate": 3.35455563361995e-06, "loss": 1.7086, "step": 5688 }, { "epoch": 1.9389911383776415, "grad_norm": 20.116843998742954, "learning_rate": 3.3526831443168263e-06, "loss": 1.769, "step": 5689 }, { "epoch": 1.9393319700068168, "grad_norm": 19.301714940117613, "learning_rate": 3.350810914143201e-06, "loss": 2.6504, "step": 5690 }, { "epoch": 1.9396728016359919, "grad_norm": 19.43228678922453, "learning_rate": 3.3489389433935825e-06, "loss": 2.6698, "step": 5691 }, { "epoch": 1.940013633265167, "grad_norm": 18.42627404252224, "learning_rate": 3.347067232362442e-06, "loss": 2.5733, "step": 5692 }, { "epoch": 1.9403544648943423, "grad_norm": 12.137564369958367, "learning_rate": 3.345195781344206e-06, "loss": 1.9686, "step": 5693 }, { "epoch": 1.9406952965235174, "grad_norm": 19.09535130945397, "learning_rate": 3.3433245906332595e-06, "loss": 2.193, "step": 5694 }, { "epoch": 1.9410361281526924, "grad_norm": 14.628529685601755, "learning_rate": 3.341453660523952e-06, "loss": 1.9515, "step": 5695 }, { "epoch": 1.9413769597818678, "grad_norm": 27.419302762279617, "learning_rate": 3.339582991310584e-06, "loss": 1.9686, "step": 5696 }, { "epoch": 1.941717791411043, "grad_norm": 14.120642663211887, "learning_rate": 3.3377125832874214e-06, "loss": 1.8493, "step": 5697 }, { "epoch": 1.9420586230402181, "grad_norm": 18.197955075244902, "learning_rate": 3.3358424367486874e-06, "loss": 1.8572, "step": 5698 }, { "epoch": 1.9423994546693932, "grad_norm": 18.080344193269028, "learning_rate": 3.3339725519885622e-06, "loss": 2.2167, "step": 5699 }, { "epoch": 1.9427402862985685, "grad_norm": 15.898188572884655, "learning_rate": 3.3321029293011873e-06, "loss": 2.4496, "step": 5700 }, { "epoch": 1.9430811179277439, "grad_norm": 15.180573342837723, "learning_rate": 3.330233568980661e-06, "loss": 2.3685, "step": 5701 }, { "epoch": 1.943421949556919, "grad_norm": 11.024771534147874, "learning_rate": 3.3283644713210417e-06, "loss": 2.012, "step": 5702 }, { "epoch": 1.943762781186094, "grad_norm": 15.43241077671246, "learning_rate": 3.326495636616345e-06, "loss": 2.1581, "step": 5703 }, { "epoch": 1.9441036128152693, "grad_norm": 26.853148855737334, "learning_rate": 3.324627065160545e-06, "loss": 2.0485, "step": 5704 }, { "epoch": 1.9444444444444444, "grad_norm": 16.047938184512756, "learning_rate": 3.322758757247579e-06, "loss": 2.4596, "step": 5705 }, { "epoch": 1.9447852760736195, "grad_norm": 17.334869400196393, "learning_rate": 3.3208907131713373e-06, "loss": 2.0285, "step": 5706 }, { "epoch": 1.9451261077027948, "grad_norm": 17.47691095332133, "learning_rate": 3.3190229332256686e-06, "loss": 2.3595, "step": 5707 }, { "epoch": 1.9454669393319701, "grad_norm": 22.285971636643016, "learning_rate": 3.3171554177043847e-06, "loss": 1.8754, "step": 5708 }, { "epoch": 1.9458077709611452, "grad_norm": 17.042827274353698, "learning_rate": 3.315288166901254e-06, "loss": 2.7091, "step": 5709 }, { "epoch": 1.9461486025903203, "grad_norm": 18.585611143197024, "learning_rate": 3.313421181109999e-06, "loss": 2.578, "step": 5710 }, { "epoch": 1.9464894342194956, "grad_norm": 16.328094302866184, "learning_rate": 3.3115544606243083e-06, "loss": 2.116, "step": 5711 }, { "epoch": 1.946830265848671, "grad_norm": 21.189043588042637, "learning_rate": 3.309688005737821e-06, "loss": 1.8614, "step": 5712 }, { "epoch": 1.9471710974778458, "grad_norm": 18.16752628850933, "learning_rate": 3.3078218167441378e-06, "loss": 2.8667, "step": 5713 }, { "epoch": 1.947511929107021, "grad_norm": 23.788809618703798, "learning_rate": 3.305955893936822e-06, "loss": 2.4673, "step": 5714 }, { "epoch": 1.9478527607361964, "grad_norm": 21.50942480740741, "learning_rate": 3.304090237609387e-06, "loss": 1.3951, "step": 5715 }, { "epoch": 1.9481935923653715, "grad_norm": 15.126369984680121, "learning_rate": 3.3022248480553064e-06, "loss": 2.2645, "step": 5716 }, { "epoch": 1.9485344239945466, "grad_norm": 18.339461790788306, "learning_rate": 3.3003597255680174e-06, "loss": 2.2545, "step": 5717 }, { "epoch": 1.9488752556237219, "grad_norm": 17.034327207006395, "learning_rate": 3.2984948704409104e-06, "loss": 1.7873, "step": 5718 }, { "epoch": 1.9492160872528972, "grad_norm": 16.35926492535185, "learning_rate": 3.296630282967331e-06, "loss": 2.2263, "step": 5719 }, { "epoch": 1.9495569188820723, "grad_norm": 18.43875159462274, "learning_rate": 3.294765963440591e-06, "loss": 2.4229, "step": 5720 }, { "epoch": 1.9498977505112474, "grad_norm": 14.765969311322797, "learning_rate": 3.2929019121539516e-06, "loss": 2.184, "step": 5721 }, { "epoch": 1.9502385821404227, "grad_norm": 28.806074364653373, "learning_rate": 3.2910381294006365e-06, "loss": 2.3935, "step": 5722 }, { "epoch": 1.950579413769598, "grad_norm": 19.47064146815464, "learning_rate": 3.289174615473829e-06, "loss": 1.8824, "step": 5723 }, { "epoch": 1.9509202453987728, "grad_norm": 23.278854436042312, "learning_rate": 3.287311370666664e-06, "loss": 2.1099, "step": 5724 }, { "epoch": 1.9512610770279482, "grad_norm": 17.732278287311072, "learning_rate": 3.285448395272237e-06, "loss": 2.501, "step": 5725 }, { "epoch": 1.9516019086571235, "grad_norm": 17.833993211164263, "learning_rate": 3.283585689583604e-06, "loss": 2.5225, "step": 5726 }, { "epoch": 1.9519427402862985, "grad_norm": 12.54273241926783, "learning_rate": 3.281723253893775e-06, "loss": 1.7607, "step": 5727 }, { "epoch": 1.9522835719154736, "grad_norm": 18.947616712144374, "learning_rate": 3.279861088495716e-06, "loss": 2.0575, "step": 5728 }, { "epoch": 1.952624403544649, "grad_norm": 10.546948084053541, "learning_rate": 3.2779991936823568e-06, "loss": 2.0775, "step": 5729 }, { "epoch": 1.9529652351738243, "grad_norm": 11.413092740152146, "learning_rate": 3.2761375697465804e-06, "loss": 1.8894, "step": 5730 }, { "epoch": 1.9533060668029993, "grad_norm": 14.696943822839431, "learning_rate": 3.2742762169812236e-06, "loss": 1.884, "step": 5731 }, { "epoch": 1.9536468984321744, "grad_norm": 13.626171079282505, "learning_rate": 3.2724151356790906e-06, "loss": 2.2697, "step": 5732 }, { "epoch": 1.9539877300613497, "grad_norm": 19.9341111742677, "learning_rate": 3.2705543261329327e-06, "loss": 2.528, "step": 5733 }, { "epoch": 1.954328561690525, "grad_norm": 23.15069940763604, "learning_rate": 3.2686937886354624e-06, "loss": 2.9117, "step": 5734 }, { "epoch": 1.9546693933197, "grad_norm": 29.047363316481498, "learning_rate": 3.2668335234793537e-06, "loss": 2.3789, "step": 5735 }, { "epoch": 1.9550102249488752, "grad_norm": 20.478667771172468, "learning_rate": 3.2649735309572306e-06, "loss": 2.8673, "step": 5736 }, { "epoch": 1.9553510565780505, "grad_norm": 17.28018225536224, "learning_rate": 3.2631138113616767e-06, "loss": 2.1826, "step": 5737 }, { "epoch": 1.9556918882072256, "grad_norm": 19.452564874206686, "learning_rate": 3.2612543649852336e-06, "loss": 2.8529, "step": 5738 }, { "epoch": 1.9560327198364007, "grad_norm": 17.231369190378487, "learning_rate": 3.2593951921204024e-06, "loss": 2.1078, "step": 5739 }, { "epoch": 1.956373551465576, "grad_norm": 22.18192376291246, "learning_rate": 3.2575362930596366e-06, "loss": 1.5022, "step": 5740 }, { "epoch": 1.9567143830947513, "grad_norm": 14.863294710865148, "learning_rate": 3.255677668095346e-06, "loss": 2.4737, "step": 5741 }, { "epoch": 1.9570552147239264, "grad_norm": 23.72298850822961, "learning_rate": 3.2538193175199035e-06, "loss": 2.5175, "step": 5742 }, { "epoch": 1.9573960463531015, "grad_norm": 15.549167727169428, "learning_rate": 3.251961241625634e-06, "loss": 1.9486, "step": 5743 }, { "epoch": 1.9577368779822768, "grad_norm": 13.255547340052246, "learning_rate": 3.250103440704818e-06, "loss": 2.1866, "step": 5744 }, { "epoch": 1.9580777096114519, "grad_norm": 15.910939636000897, "learning_rate": 3.2482459150496987e-06, "loss": 1.9629, "step": 5745 }, { "epoch": 1.958418541240627, "grad_norm": 23.591396123758418, "learning_rate": 3.246388664952469e-06, "loss": 2.8744, "step": 5746 }, { "epoch": 1.9587593728698023, "grad_norm": 20.57820224651733, "learning_rate": 3.244531690705283e-06, "loss": 2.0024, "step": 5747 }, { "epoch": 1.9591002044989776, "grad_norm": 17.356657514093797, "learning_rate": 3.242674992600251e-06, "loss": 3.0643, "step": 5748 }, { "epoch": 1.9594410361281527, "grad_norm": 20.278768178567862, "learning_rate": 3.240818570929438e-06, "loss": 2.1787, "step": 5749 }, { "epoch": 1.9597818677573278, "grad_norm": 19.478223208399715, "learning_rate": 3.238962425984865e-06, "loss": 2.1826, "step": 5750 }, { "epoch": 1.960122699386503, "grad_norm": 18.930110855891815, "learning_rate": 3.237106558058514e-06, "loss": 2.8057, "step": 5751 }, { "epoch": 1.9604635310156784, "grad_norm": 14.910385830596676, "learning_rate": 3.2352509674423205e-06, "loss": 1.9556, "step": 5752 }, { "epoch": 1.9608043626448535, "grad_norm": 19.650333671978228, "learning_rate": 3.233395654428172e-06, "loss": 2.0414, "step": 5753 }, { "epoch": 1.9611451942740286, "grad_norm": 38.37434696548279, "learning_rate": 3.2315406193079223e-06, "loss": 2.2915, "step": 5754 }, { "epoch": 1.9614860259032039, "grad_norm": 20.107692601381476, "learning_rate": 3.229685862373371e-06, "loss": 1.7174, "step": 5755 }, { "epoch": 1.961826857532379, "grad_norm": 17.20811445061792, "learning_rate": 3.2278313839162807e-06, "loss": 1.6098, "step": 5756 }, { "epoch": 1.962167689161554, "grad_norm": 26.3328355223654, "learning_rate": 3.2259771842283698e-06, "loss": 2.3995, "step": 5757 }, { "epoch": 1.9625085207907293, "grad_norm": 23.20900071197618, "learning_rate": 3.2241232636013087e-06, "loss": 2.2084, "step": 5758 }, { "epoch": 1.9628493524199047, "grad_norm": 47.758013088167665, "learning_rate": 3.222269622326727e-06, "loss": 1.8517, "step": 5759 }, { "epoch": 1.9631901840490797, "grad_norm": 16.891889705662788, "learning_rate": 3.2204162606962126e-06, "loss": 2.0093, "step": 5760 }, { "epoch": 1.9635310156782548, "grad_norm": 19.50382205363358, "learning_rate": 3.218563179001304e-06, "loss": 1.7428, "step": 5761 }, { "epoch": 1.9638718473074301, "grad_norm": 15.217503932690668, "learning_rate": 3.2167103775334972e-06, "loss": 2.1608, "step": 5762 }, { "epoch": 1.9642126789366054, "grad_norm": 13.727200541448699, "learning_rate": 3.2148578565842477e-06, "loss": 1.7859, "step": 5763 }, { "epoch": 1.9645535105657805, "grad_norm": 19.495935906105302, "learning_rate": 3.2130056164449642e-06, "loss": 2.155, "step": 5764 }, { "epoch": 1.9648943421949556, "grad_norm": 25.76264450147051, "learning_rate": 3.211153657407009e-06, "loss": 2.7626, "step": 5765 }, { "epoch": 1.965235173824131, "grad_norm": 15.932377752946246, "learning_rate": 3.2093019797617075e-06, "loss": 2.0986, "step": 5766 }, { "epoch": 1.965576005453306, "grad_norm": 45.208065379023786, "learning_rate": 3.207450583800331e-06, "loss": 2.2657, "step": 5767 }, { "epoch": 1.965916837082481, "grad_norm": 12.646697955164278, "learning_rate": 3.2055994698141146e-06, "loss": 2.0711, "step": 5768 }, { "epoch": 1.9662576687116564, "grad_norm": 14.213550719421916, "learning_rate": 3.203748638094242e-06, "loss": 2.4753, "step": 5769 }, { "epoch": 1.9665985003408317, "grad_norm": 22.064323664477183, "learning_rate": 3.2018980889318617e-06, "loss": 2.6925, "step": 5770 }, { "epoch": 1.9669393319700068, "grad_norm": 17.002245809385066, "learning_rate": 3.2000478226180693e-06, "loss": 1.7021, "step": 5771 }, { "epoch": 1.967280163599182, "grad_norm": 19.365027048915234, "learning_rate": 3.1981978394439186e-06, "loss": 2.1416, "step": 5772 }, { "epoch": 1.9676209952283572, "grad_norm": 16.78142851571679, "learning_rate": 3.1963481397004213e-06, "loss": 1.9589, "step": 5773 }, { "epoch": 1.9679618268575325, "grad_norm": 24.083165567436765, "learning_rate": 3.194498723678542e-06, "loss": 2.2409, "step": 5774 }, { "epoch": 1.9683026584867076, "grad_norm": 19.090730983097068, "learning_rate": 3.192649591669199e-06, "loss": 2.2722, "step": 5775 }, { "epoch": 1.9686434901158827, "grad_norm": 16.225279861519432, "learning_rate": 3.190800743963271e-06, "loss": 2.072, "step": 5776 }, { "epoch": 1.968984321745058, "grad_norm": 25.66007861051097, "learning_rate": 3.1889521808515888e-06, "loss": 2.0987, "step": 5777 }, { "epoch": 1.969325153374233, "grad_norm": 17.48914165331983, "learning_rate": 3.1871039026249357e-06, "loss": 2.6635, "step": 5778 }, { "epoch": 1.9696659850034082, "grad_norm": 15.066081004326822, "learning_rate": 3.185255909574058e-06, "loss": 2.5514, "step": 5779 }, { "epoch": 1.9700068166325835, "grad_norm": 21.175162333850228, "learning_rate": 3.183408201989648e-06, "loss": 1.9878, "step": 5780 }, { "epoch": 1.9703476482617588, "grad_norm": 14.054304146524911, "learning_rate": 3.1815607801623586e-06, "loss": 1.9365, "step": 5781 }, { "epoch": 1.9706884798909339, "grad_norm": 13.956719905404505, "learning_rate": 3.1797136443827994e-06, "loss": 2.2665, "step": 5782 }, { "epoch": 1.971029311520109, "grad_norm": 16.681645835655956, "learning_rate": 3.177866794941529e-06, "loss": 2.6672, "step": 5783 }, { "epoch": 1.9713701431492843, "grad_norm": 24.69453025659232, "learning_rate": 3.1760202321290657e-06, "loss": 1.1089, "step": 5784 }, { "epoch": 1.9717109747784596, "grad_norm": 19.061346066897997, "learning_rate": 3.1741739562358796e-06, "loss": 1.6685, "step": 5785 }, { "epoch": 1.9720518064076347, "grad_norm": 35.328165613819905, "learning_rate": 3.1723279675524006e-06, "loss": 2.4507, "step": 5786 }, { "epoch": 1.9723926380368098, "grad_norm": 24.193151889118255, "learning_rate": 3.170482266369005e-06, "loss": 1.7514, "step": 5787 }, { "epoch": 1.972733469665985, "grad_norm": 21.51140874434846, "learning_rate": 3.168636852976033e-06, "loss": 2.5879, "step": 5788 }, { "epoch": 1.9730743012951601, "grad_norm": 14.423748220129152, "learning_rate": 3.1667917276637753e-06, "loss": 2.3045, "step": 5789 }, { "epoch": 1.9734151329243352, "grad_norm": 21.45228877039174, "learning_rate": 3.1649468907224747e-06, "loss": 2.2022, "step": 5790 }, { "epoch": 1.9737559645535105, "grad_norm": 18.23047001484623, "learning_rate": 3.1631023424423347e-06, "loss": 2.8967, "step": 5791 }, { "epoch": 1.9740967961826859, "grad_norm": 26.14538127019853, "learning_rate": 3.1612580831135077e-06, "loss": 2.4539, "step": 5792 }, { "epoch": 1.974437627811861, "grad_norm": 19.091662792793464, "learning_rate": 3.159414113026103e-06, "loss": 2.2058, "step": 5793 }, { "epoch": 1.974778459441036, "grad_norm": 21.78570735284556, "learning_rate": 3.157570432470187e-06, "loss": 2.0414, "step": 5794 }, { "epoch": 1.9751192910702113, "grad_norm": 14.323647195712317, "learning_rate": 3.1557270417357767e-06, "loss": 2.1919, "step": 5795 }, { "epoch": 1.9754601226993866, "grad_norm": 16.821743304090177, "learning_rate": 3.153883941112842e-06, "loss": 2.4257, "step": 5796 }, { "epoch": 1.9758009543285617, "grad_norm": 18.597380136317422, "learning_rate": 3.1520411308913137e-06, "loss": 2.3544, "step": 5797 }, { "epoch": 1.9761417859577368, "grad_norm": 15.956738655964285, "learning_rate": 3.1501986113610716e-06, "loss": 2.0469, "step": 5798 }, { "epoch": 1.9764826175869121, "grad_norm": 10.51696138390365, "learning_rate": 3.1483563828119507e-06, "loss": 1.7813, "step": 5799 }, { "epoch": 1.9768234492160872, "grad_norm": 17.064759964947072, "learning_rate": 3.146514445533744e-06, "loss": 1.6749, "step": 5800 }, { "epoch": 1.9771642808452623, "grad_norm": 15.866098263017482, "learning_rate": 3.1446727998161915e-06, "loss": 2.5238, "step": 5801 }, { "epoch": 1.9775051124744376, "grad_norm": 16.21090472417081, "learning_rate": 3.142831445948994e-06, "loss": 2.7831, "step": 5802 }, { "epoch": 1.977845944103613, "grad_norm": 24.384660582288838, "learning_rate": 3.1409903842218014e-06, "loss": 2.4727, "step": 5803 }, { "epoch": 1.978186775732788, "grad_norm": 13.76026811050494, "learning_rate": 3.139149614924224e-06, "loss": 1.9295, "step": 5804 }, { "epoch": 1.978527607361963, "grad_norm": 18.965873107068063, "learning_rate": 3.137309138345819e-06, "loss": 1.9463, "step": 5805 }, { "epoch": 1.9788684389911384, "grad_norm": 16.251860260822273, "learning_rate": 3.1354689547760996e-06, "loss": 2.3171, "step": 5806 }, { "epoch": 1.9792092706203137, "grad_norm": 15.799053116682103, "learning_rate": 3.1336290645045385e-06, "loss": 1.7163, "step": 5807 }, { "epoch": 1.9795501022494888, "grad_norm": 13.37514858381636, "learning_rate": 3.131789467820555e-06, "loss": 2.1945, "step": 5808 }, { "epoch": 1.9798909338786639, "grad_norm": 17.999948210177433, "learning_rate": 3.1299501650135234e-06, "loss": 2.0215, "step": 5809 }, { "epoch": 1.9802317655078392, "grad_norm": 15.009488941506776, "learning_rate": 3.128111156372775e-06, "loss": 2.5378, "step": 5810 }, { "epoch": 1.9805725971370143, "grad_norm": 17.680059808737486, "learning_rate": 3.1262724421875956e-06, "loss": 2.2799, "step": 5811 }, { "epoch": 1.9809134287661894, "grad_norm": 16.22457885747555, "learning_rate": 3.124434022747217e-06, "loss": 2.2319, "step": 5812 }, { "epoch": 1.9812542603953647, "grad_norm": 15.83436911111804, "learning_rate": 3.1225958983408356e-06, "loss": 2.2219, "step": 5813 }, { "epoch": 1.98159509202454, "grad_norm": 15.018952785090322, "learning_rate": 3.1207580692575913e-06, "loss": 1.6091, "step": 5814 }, { "epoch": 1.981935923653715, "grad_norm": 15.15154657744068, "learning_rate": 3.1189205357865827e-06, "loss": 2.4045, "step": 5815 }, { "epoch": 1.9822767552828902, "grad_norm": 19.66513835846893, "learning_rate": 3.117083298216864e-06, "loss": 2.3999, "step": 5816 }, { "epoch": 1.9826175869120655, "grad_norm": 16.399741730416256, "learning_rate": 3.1152463568374368e-06, "loss": 2.5954, "step": 5817 }, { "epoch": 1.9829584185412408, "grad_norm": 16.899856732932477, "learning_rate": 3.1134097119372596e-06, "loss": 1.7977, "step": 5818 }, { "epoch": 1.9832992501704159, "grad_norm": 17.279680797781236, "learning_rate": 3.111573363805247e-06, "loss": 1.8444, "step": 5819 }, { "epoch": 1.983640081799591, "grad_norm": 24.82500988333673, "learning_rate": 3.109737312730261e-06, "loss": 1.8459, "step": 5820 }, { "epoch": 1.9839809134287663, "grad_norm": 21.313812559187685, "learning_rate": 3.107901559001119e-06, "loss": 2.409, "step": 5821 }, { "epoch": 1.9843217450579413, "grad_norm": 14.844109593475453, "learning_rate": 3.106066102906595e-06, "loss": 2.2521, "step": 5822 }, { "epoch": 1.9846625766871164, "grad_norm": 14.410854297147807, "learning_rate": 3.1042309447354125e-06, "loss": 2.412, "step": 5823 }, { "epoch": 1.9850034083162917, "grad_norm": 26.98131284906122, "learning_rate": 3.1023960847762476e-06, "loss": 2.7353, "step": 5824 }, { "epoch": 1.985344239945467, "grad_norm": 14.708311345567425, "learning_rate": 3.1005615233177342e-06, "loss": 2.472, "step": 5825 }, { "epoch": 1.9856850715746421, "grad_norm": 14.235317357297905, "learning_rate": 3.0987272606484536e-06, "loss": 2.2242, "step": 5826 }, { "epoch": 1.9860259032038172, "grad_norm": 15.618919417159342, "learning_rate": 3.096893297056942e-06, "loss": 2.1691, "step": 5827 }, { "epoch": 1.9863667348329925, "grad_norm": 24.31916766371571, "learning_rate": 3.0950596328316935e-06, "loss": 1.9603, "step": 5828 }, { "epoch": 1.9867075664621678, "grad_norm": 14.709506909539577, "learning_rate": 3.0932262682611473e-06, "loss": 2.0587, "step": 5829 }, { "epoch": 1.987048398091343, "grad_norm": 17.242443955886017, "learning_rate": 3.091393203633698e-06, "loss": 1.6641, "step": 5830 }, { "epoch": 1.987389229720518, "grad_norm": 13.971075838564936, "learning_rate": 3.0895604392376954e-06, "loss": 2.3534, "step": 5831 }, { "epoch": 1.9877300613496933, "grad_norm": 12.181546523214486, "learning_rate": 3.087727975361443e-06, "loss": 1.967, "step": 5832 }, { "epoch": 1.9880708929788684, "grad_norm": 17.44368513762168, "learning_rate": 3.0858958122931916e-06, "loss": 2.3094, "step": 5833 }, { "epoch": 1.9884117246080435, "grad_norm": 18.62775781995759, "learning_rate": 3.0840639503211467e-06, "loss": 1.6119, "step": 5834 }, { "epoch": 1.9887525562372188, "grad_norm": 18.804637308630152, "learning_rate": 3.0822323897334704e-06, "loss": 2.0878, "step": 5835 }, { "epoch": 1.9890933878663941, "grad_norm": 19.6721170105978, "learning_rate": 3.0804011308182747e-06, "loss": 2.7169, "step": 5836 }, { "epoch": 1.9894342194955692, "grad_norm": 15.30578367471209, "learning_rate": 3.0785701738636197e-06, "loss": 2.2699, "step": 5837 }, { "epoch": 1.9897750511247443, "grad_norm": 18.396020442070885, "learning_rate": 3.076739519157528e-06, "loss": 1.6899, "step": 5838 }, { "epoch": 1.9901158827539196, "grad_norm": 16.63777053662709, "learning_rate": 3.0749091669879644e-06, "loss": 2.1342, "step": 5839 }, { "epoch": 1.990456714383095, "grad_norm": 17.145648256250215, "learning_rate": 3.0730791176428516e-06, "loss": 2.3421, "step": 5840 }, { "epoch": 1.99079754601227, "grad_norm": 21.136408993985373, "learning_rate": 3.071249371410066e-06, "loss": 2.2683, "step": 5841 }, { "epoch": 1.991138377641445, "grad_norm": 17.099539638031175, "learning_rate": 3.0694199285774313e-06, "loss": 2.2619, "step": 5842 }, { "epoch": 1.9914792092706204, "grad_norm": 24.386718217616597, "learning_rate": 3.067590789432727e-06, "loss": 1.9913, "step": 5843 }, { "epoch": 1.9918200408997955, "grad_norm": 15.223298445531094, "learning_rate": 3.065761954263684e-06, "loss": 2.2276, "step": 5844 }, { "epoch": 1.9921608725289706, "grad_norm": 17.170749936090306, "learning_rate": 3.0639334233579864e-06, "loss": 2.7601, "step": 5845 }, { "epoch": 1.9925017041581459, "grad_norm": 14.674428019745864, "learning_rate": 3.062105197003266e-06, "loss": 1.9504, "step": 5846 }, { "epoch": 1.9928425357873212, "grad_norm": 299.5254054958975, "learning_rate": 3.0602772754871134e-06, "loss": 1.7415, "step": 5847 }, { "epoch": 1.9931833674164963, "grad_norm": 16.213922038071285, "learning_rate": 3.0584496590970688e-06, "loss": 2.1123, "step": 5848 }, { "epoch": 1.9935241990456714, "grad_norm": 19.651975195401057, "learning_rate": 3.056622348120619e-06, "loss": 2.012, "step": 5849 }, { "epoch": 1.9938650306748467, "grad_norm": 20.208402353447077, "learning_rate": 3.054795342845212e-06, "loss": 1.7364, "step": 5850 }, { "epoch": 1.994205862304022, "grad_norm": 15.57343562293611, "learning_rate": 3.05296864355824e-06, "loss": 1.8256, "step": 5851 }, { "epoch": 1.9945466939331968, "grad_norm": 11.566271405367008, "learning_rate": 3.05114225054705e-06, "loss": 1.6349, "step": 5852 }, { "epoch": 1.9948875255623721, "grad_norm": 10.82615789459265, "learning_rate": 3.0493161640989444e-06, "loss": 2.0746, "step": 5853 }, { "epoch": 1.9952283571915475, "grad_norm": 13.99105413937312, "learning_rate": 3.047490384501171e-06, "loss": 2.3712, "step": 5854 }, { "epoch": 1.9955691888207225, "grad_norm": 12.074639327956755, "learning_rate": 3.045664912040931e-06, "loss": 2.102, "step": 5855 }, { "epoch": 1.9959100204498976, "grad_norm": 13.502177407742225, "learning_rate": 3.0438397470053806e-06, "loss": 1.9314, "step": 5856 }, { "epoch": 1.996250852079073, "grad_norm": 14.913298836632139, "learning_rate": 3.042014889681626e-06, "loss": 1.8047, "step": 5857 }, { "epoch": 1.9965916837082482, "grad_norm": 23.494732440905896, "learning_rate": 3.0401903403567222e-06, "loss": 2.8226, "step": 5858 }, { "epoch": 1.9969325153374233, "grad_norm": 12.015646814666866, "learning_rate": 3.038366099317681e-06, "loss": 1.396, "step": 5859 }, { "epoch": 1.9972733469665984, "grad_norm": 11.914490713108812, "learning_rate": 3.03654216685146e-06, "loss": 1.9757, "step": 5860 }, { "epoch": 1.9976141785957737, "grad_norm": 17.56173106992764, "learning_rate": 3.034718543244971e-06, "loss": 2.7571, "step": 5861 }, { "epoch": 1.997955010224949, "grad_norm": 13.703676360891958, "learning_rate": 3.0328952287850812e-06, "loss": 2.3831, "step": 5862 }, { "epoch": 1.998295841854124, "grad_norm": 17.36940272449991, "learning_rate": 3.031072223758602e-06, "loss": 2.4012, "step": 5863 }, { "epoch": 1.9986366734832992, "grad_norm": 15.15020348264035, "learning_rate": 3.029249528452298e-06, "loss": 1.6634, "step": 5864 }, { "epoch": 1.9989775051124745, "grad_norm": 21.78173159763287, "learning_rate": 3.0274271431528894e-06, "loss": 2.6104, "step": 5865 }, { "epoch": 1.9993183367416496, "grad_norm": 13.361806938458646, "learning_rate": 3.0256050681470446e-06, "loss": 1.9403, "step": 5866 }, { "epoch": 1.9996591683708247, "grad_norm": 19.34090170163813, "learning_rate": 3.0237833037213822e-06, "loss": 2.1582, "step": 5867 }, { "epoch": 2.0, "grad_norm": 16.4305595079954, "learning_rate": 3.02196185016247e-06, "loss": 1.3243, "step": 5868 }, { "epoch": 2.0003408316291753, "grad_norm": 17.49196182027437, "learning_rate": 3.020140707756836e-06, "loss": 1.5072, "step": 5869 }, { "epoch": 2.00068166325835, "grad_norm": 17.594830753949804, "learning_rate": 3.0183198767909493e-06, "loss": 1.4202, "step": 5870 }, { "epoch": 2.0010224948875255, "grad_norm": 7.653830446786605, "learning_rate": 3.0164993575512325e-06, "loss": 1.041, "step": 5871 }, { "epoch": 2.001363326516701, "grad_norm": 14.633944500922489, "learning_rate": 3.0146791503240656e-06, "loss": 1.5661, "step": 5872 }, { "epoch": 2.001704158145876, "grad_norm": 12.17944149309846, "learning_rate": 3.0128592553957703e-06, "loss": 1.8535, "step": 5873 }, { "epoch": 2.002044989775051, "grad_norm": 9.731941893972284, "learning_rate": 3.0110396730526226e-06, "loss": 1.7515, "step": 5874 }, { "epoch": 2.0023858214042263, "grad_norm": 10.412821352963755, "learning_rate": 3.009220403580855e-06, "loss": 1.4629, "step": 5875 }, { "epoch": 2.0027266530334016, "grad_norm": 22.6584449548545, "learning_rate": 3.0074014472666414e-06, "loss": 1.6104, "step": 5876 }, { "epoch": 2.003067484662577, "grad_norm": 18.016223455123445, "learning_rate": 3.005582804396111e-06, "loss": 1.4938, "step": 5877 }, { "epoch": 2.0034083162917518, "grad_norm": 12.732893865210198, "learning_rate": 3.0037644752553465e-06, "loss": 1.4687, "step": 5878 }, { "epoch": 2.003749147920927, "grad_norm": 10.442586534086297, "learning_rate": 3.001946460130377e-06, "loss": 1.5199, "step": 5879 }, { "epoch": 2.0040899795501024, "grad_norm": 29.834247509779253, "learning_rate": 3.0001287593071817e-06, "loss": 1.9859, "step": 5880 }, { "epoch": 2.0044308111792772, "grad_norm": 12.60843466704873, "learning_rate": 2.9983113730716935e-06, "loss": 1.6611, "step": 5881 }, { "epoch": 2.0047716428084525, "grad_norm": 14.701526469610043, "learning_rate": 2.996494301709796e-06, "loss": 1.2581, "step": 5882 }, { "epoch": 2.005112474437628, "grad_norm": 15.432775629222135, "learning_rate": 2.9946775455073186e-06, "loss": 1.8277, "step": 5883 }, { "epoch": 2.005453306066803, "grad_norm": 13.35674912828684, "learning_rate": 2.992861104750048e-06, "loss": 1.4428, "step": 5884 }, { "epoch": 2.005794137695978, "grad_norm": 12.668237259288661, "learning_rate": 2.991044979723714e-06, "loss": 1.4662, "step": 5885 }, { "epoch": 2.0061349693251533, "grad_norm": 19.262451123107567, "learning_rate": 2.9892291707140008e-06, "loss": 1.1924, "step": 5886 }, { "epoch": 2.0064758009543286, "grad_norm": 10.048104753670048, "learning_rate": 2.9874136780065465e-06, "loss": 1.3086, "step": 5887 }, { "epoch": 2.006816632583504, "grad_norm": 12.686965039912572, "learning_rate": 2.9855985018869314e-06, "loss": 1.3283, "step": 5888 }, { "epoch": 2.007157464212679, "grad_norm": 19.83647701817355, "learning_rate": 2.9837836426406892e-06, "loss": 1.2719, "step": 5889 }, { "epoch": 2.007498295841854, "grad_norm": 14.359877304684153, "learning_rate": 2.981969100553307e-06, "loss": 1.8391, "step": 5890 }, { "epoch": 2.0078391274710294, "grad_norm": 6.806612880084929, "learning_rate": 2.9801548759102197e-06, "loss": 0.8837, "step": 5891 }, { "epoch": 2.0081799591002043, "grad_norm": 17.884175152993528, "learning_rate": 2.978340968996809e-06, "loss": 1.8828, "step": 5892 }, { "epoch": 2.0085207907293796, "grad_norm": 13.949738768760142, "learning_rate": 2.9765273800984134e-06, "loss": 1.8214, "step": 5893 }, { "epoch": 2.008861622358555, "grad_norm": 17.23643291011136, "learning_rate": 2.974714109500315e-06, "loss": 1.8594, "step": 5894 }, { "epoch": 2.0092024539877302, "grad_norm": 15.877157448978377, "learning_rate": 2.9729011574877485e-06, "loss": 1.3273, "step": 5895 }, { "epoch": 2.009543285616905, "grad_norm": 12.48661549927127, "learning_rate": 2.971088524345901e-06, "loss": 1.2264, "step": 5896 }, { "epoch": 2.0098841172460804, "grad_norm": 14.897193380190433, "learning_rate": 2.969276210359906e-06, "loss": 1.893, "step": 5897 }, { "epoch": 2.0102249488752557, "grad_norm": 16.094292887869155, "learning_rate": 2.9674642158148447e-06, "loss": 2.1792, "step": 5898 }, { "epoch": 2.010565780504431, "grad_norm": 14.468080684023356, "learning_rate": 2.9656525409957538e-06, "loss": 1.5277, "step": 5899 }, { "epoch": 2.010906612133606, "grad_norm": 11.832464069433618, "learning_rate": 2.963841186187617e-06, "loss": 1.4943, "step": 5900 }, { "epoch": 2.011247443762781, "grad_norm": 14.518967868635778, "learning_rate": 2.962030151675367e-06, "loss": 2.0398, "step": 5901 }, { "epoch": 2.0115882753919565, "grad_norm": 11.955859998794185, "learning_rate": 2.9602194377438865e-06, "loss": 1.3329, "step": 5902 }, { "epoch": 2.0119291070211314, "grad_norm": 13.844855042666769, "learning_rate": 2.958409044678009e-06, "loss": 1.689, "step": 5903 }, { "epoch": 2.0122699386503067, "grad_norm": 16.836697715489677, "learning_rate": 2.956598972762516e-06, "loss": 1.1664, "step": 5904 }, { "epoch": 2.012610770279482, "grad_norm": 16.239951306986672, "learning_rate": 2.954789222282137e-06, "loss": 1.7287, "step": 5905 }, { "epoch": 2.0129516019086573, "grad_norm": 11.317480696703344, "learning_rate": 2.9529797935215565e-06, "loss": 1.4855, "step": 5906 }, { "epoch": 2.013292433537832, "grad_norm": 10.697092465237922, "learning_rate": 2.951170686765404e-06, "loss": 1.1902, "step": 5907 }, { "epoch": 2.0136332651670075, "grad_norm": 12.341907337210694, "learning_rate": 2.9493619022982557e-06, "loss": 1.529, "step": 5908 }, { "epoch": 2.0139740967961828, "grad_norm": 16.991995512288433, "learning_rate": 2.947553440404646e-06, "loss": 1.509, "step": 5909 }, { "epoch": 2.014314928425358, "grad_norm": 14.126546520587022, "learning_rate": 2.9457453013690496e-06, "loss": 0.7812, "step": 5910 }, { "epoch": 2.014655760054533, "grad_norm": 12.494546011310195, "learning_rate": 2.943937485475893e-06, "loss": 1.8191, "step": 5911 }, { "epoch": 2.0149965916837083, "grad_norm": 13.087342121501397, "learning_rate": 2.942129993009557e-06, "loss": 1.2004, "step": 5912 }, { "epoch": 2.0153374233128836, "grad_norm": 16.939804567243574, "learning_rate": 2.940322824254365e-06, "loss": 1.6693, "step": 5913 }, { "epoch": 2.0156782549420584, "grad_norm": 14.071113490960164, "learning_rate": 2.93851597949459e-06, "loss": 1.736, "step": 5914 }, { "epoch": 2.0160190865712337, "grad_norm": 15.233768683507082, "learning_rate": 2.9367094590144583e-06, "loss": 1.5352, "step": 5915 }, { "epoch": 2.016359918200409, "grad_norm": 18.837344593481212, "learning_rate": 2.9349032630981437e-06, "loss": 1.2867, "step": 5916 }, { "epoch": 2.0167007498295844, "grad_norm": 10.933406332923548, "learning_rate": 2.9330973920297645e-06, "loss": 1.4677, "step": 5917 }, { "epoch": 2.0170415814587592, "grad_norm": 12.966559877823043, "learning_rate": 2.931291846093395e-06, "loss": 1.313, "step": 5918 }, { "epoch": 2.0173824130879345, "grad_norm": 16.611414141344532, "learning_rate": 2.9294866255730527e-06, "loss": 1.2212, "step": 5919 }, { "epoch": 2.01772324471711, "grad_norm": 14.427731206730764, "learning_rate": 2.927681730752705e-06, "loss": 1.7153, "step": 5920 }, { "epoch": 2.018064076346285, "grad_norm": 11.780427128599952, "learning_rate": 2.925877161916273e-06, "loss": 1.5987, "step": 5921 }, { "epoch": 2.01840490797546, "grad_norm": 10.725819779057913, "learning_rate": 2.924072919347619e-06, "loss": 1.1893, "step": 5922 }, { "epoch": 2.0187457396046353, "grad_norm": 17.579116712848737, "learning_rate": 2.9222690033305574e-06, "loss": 1.1106, "step": 5923 }, { "epoch": 2.0190865712338106, "grad_norm": 16.679341409894658, "learning_rate": 2.9204654141488533e-06, "loss": 1.8806, "step": 5924 }, { "epoch": 2.0194274028629855, "grad_norm": 15.64063676280925, "learning_rate": 2.918662152086219e-06, "loss": 1.5929, "step": 5925 }, { "epoch": 2.019768234492161, "grad_norm": 13.98751452273023, "learning_rate": 2.91685921742631e-06, "loss": 1.9522, "step": 5926 }, { "epoch": 2.020109066121336, "grad_norm": 16.194339241862274, "learning_rate": 2.9150566104527427e-06, "loss": 1.3364, "step": 5927 }, { "epoch": 2.0204498977505114, "grad_norm": 13.775130828961522, "learning_rate": 2.9132543314490687e-06, "loss": 1.3988, "step": 5928 }, { "epoch": 2.0207907293796863, "grad_norm": 17.388030460466943, "learning_rate": 2.911452380698795e-06, "loss": 1.5626, "step": 5929 }, { "epoch": 2.0211315610088616, "grad_norm": 9.410181940610926, "learning_rate": 2.909650758485375e-06, "loss": 1.2159, "step": 5930 }, { "epoch": 2.021472392638037, "grad_norm": 12.46500719942039, "learning_rate": 2.9078494650922128e-06, "loss": 1.1145, "step": 5931 }, { "epoch": 2.021813224267212, "grad_norm": 18.106416333582533, "learning_rate": 2.9060485008026602e-06, "loss": 1.639, "step": 5932 }, { "epoch": 2.022154055896387, "grad_norm": 14.883923441402564, "learning_rate": 2.9042478659000093e-06, "loss": 2.0705, "step": 5933 }, { "epoch": 2.0224948875255624, "grad_norm": 13.446974329878971, "learning_rate": 2.9024475606675146e-06, "loss": 1.5106, "step": 5934 }, { "epoch": 2.0228357191547377, "grad_norm": 12.356651135702604, "learning_rate": 2.9006475853883696e-06, "loss": 1.491, "step": 5935 }, { "epoch": 2.0231765507839126, "grad_norm": 19.501011149278824, "learning_rate": 2.898847940345714e-06, "loss": 0.8964, "step": 5936 }, { "epoch": 2.023517382413088, "grad_norm": 16.124298541506512, "learning_rate": 2.897048625822644e-06, "loss": 1.7542, "step": 5937 }, { "epoch": 2.023858214042263, "grad_norm": 14.581607476617053, "learning_rate": 2.8952496421021946e-06, "loss": 1.2581, "step": 5938 }, { "epoch": 2.0241990456714385, "grad_norm": 14.913194602814318, "learning_rate": 2.893450989467354e-06, "loss": 1.7744, "step": 5939 }, { "epoch": 2.0245398773006134, "grad_norm": 16.75667877400706, "learning_rate": 2.8916526682010615e-06, "loss": 1.8273, "step": 5940 }, { "epoch": 2.0248807089297887, "grad_norm": 17.19260112750406, "learning_rate": 2.8898546785861957e-06, "loss": 1.1158, "step": 5941 }, { "epoch": 2.025221540558964, "grad_norm": 10.341508391087194, "learning_rate": 2.888057020905589e-06, "loss": 1.1803, "step": 5942 }, { "epoch": 2.025562372188139, "grad_norm": 26.34735470889903, "learning_rate": 2.8862596954420197e-06, "loss": 1.4077, "step": 5943 }, { "epoch": 2.025903203817314, "grad_norm": 40.38309636472682, "learning_rate": 2.884462702478215e-06, "loss": 1.4507, "step": 5944 }, { "epoch": 2.0262440354464895, "grad_norm": 17.200781458070555, "learning_rate": 2.882666042296849e-06, "loss": 1.4572, "step": 5945 }, { "epoch": 2.0265848670756648, "grad_norm": 13.659233618427017, "learning_rate": 2.880869715180542e-06, "loss": 1.6364, "step": 5946 }, { "epoch": 2.0269256987048396, "grad_norm": 12.586535167293086, "learning_rate": 2.879073721411865e-06, "loss": 1.7081, "step": 5947 }, { "epoch": 2.027266530334015, "grad_norm": 17.169462281367654, "learning_rate": 2.8772780612733353e-06, "loss": 1.8755, "step": 5948 }, { "epoch": 2.0276073619631902, "grad_norm": 16.130843722745627, "learning_rate": 2.8754827350474156e-06, "loss": 1.4468, "step": 5949 }, { "epoch": 2.0279481935923656, "grad_norm": 18.72158190175183, "learning_rate": 2.87368774301652e-06, "loss": 1.2191, "step": 5950 }, { "epoch": 2.0282890252215404, "grad_norm": 15.240119058873647, "learning_rate": 2.8718930854630058e-06, "loss": 1.361, "step": 5951 }, { "epoch": 2.0286298568507157, "grad_norm": 12.207534379252968, "learning_rate": 2.8700987626691812e-06, "loss": 1.4238, "step": 5952 }, { "epoch": 2.028970688479891, "grad_norm": 14.301160495922447, "learning_rate": 2.8683047749173023e-06, "loss": 1.4914, "step": 5953 }, { "epoch": 2.029311520109066, "grad_norm": 13.687410880607867, "learning_rate": 2.866511122489563e-06, "loss": 1.691, "step": 5954 }, { "epoch": 2.029652351738241, "grad_norm": 13.130999925203884, "learning_rate": 2.8647178056681197e-06, "loss": 1.8387, "step": 5955 }, { "epoch": 2.0299931833674165, "grad_norm": 21.07102286131651, "learning_rate": 2.862924824735068e-06, "loss": 2.1856, "step": 5956 }, { "epoch": 2.030334014996592, "grad_norm": 16.881189544885952, "learning_rate": 2.8611321799724433e-06, "loss": 1.571, "step": 5957 }, { "epoch": 2.0306748466257667, "grad_norm": 19.58728391415735, "learning_rate": 2.8593398716622466e-06, "loss": 1.8569, "step": 5958 }, { "epoch": 2.031015678254942, "grad_norm": 13.61058801036104, "learning_rate": 2.857547900086407e-06, "loss": 1.5459, "step": 5959 }, { "epoch": 2.0313565098841173, "grad_norm": 16.52674447606881, "learning_rate": 2.855756265526809e-06, "loss": 1.427, "step": 5960 }, { "epoch": 2.0316973415132926, "grad_norm": 11.579943498307614, "learning_rate": 2.8539649682652904e-06, "loss": 1.142, "step": 5961 }, { "epoch": 2.0320381731424675, "grad_norm": 10.561175014372564, "learning_rate": 2.8521740085836224e-06, "loss": 1.3507, "step": 5962 }, { "epoch": 2.032379004771643, "grad_norm": 14.39611247094284, "learning_rate": 2.8503833867635345e-06, "loss": 1.5365, "step": 5963 }, { "epoch": 2.032719836400818, "grad_norm": 9.703233820855386, "learning_rate": 2.8485931030866953e-06, "loss": 0.7759, "step": 5964 }, { "epoch": 2.033060668029993, "grad_norm": 15.641864156901267, "learning_rate": 2.8468031578347267e-06, "loss": 1.6484, "step": 5965 }, { "epoch": 2.0334014996591683, "grad_norm": 13.806622697665441, "learning_rate": 2.8450135512891936e-06, "loss": 1.2991, "step": 5966 }, { "epoch": 2.0337423312883436, "grad_norm": 15.159099623616427, "learning_rate": 2.8432242837316038e-06, "loss": 1.5164, "step": 5967 }, { "epoch": 2.034083162917519, "grad_norm": 15.119439544232968, "learning_rate": 2.8414353554434214e-06, "loss": 1.3432, "step": 5968 }, { "epoch": 2.0344239945466938, "grad_norm": 16.740425728177552, "learning_rate": 2.8396467667060535e-06, "loss": 1.2969, "step": 5969 }, { "epoch": 2.034764826175869, "grad_norm": 12.39401512970434, "learning_rate": 2.8378585178008444e-06, "loss": 1.2345, "step": 5970 }, { "epoch": 2.0351056578050444, "grad_norm": 16.3111573980388, "learning_rate": 2.836070609009101e-06, "loss": 1.3837, "step": 5971 }, { "epoch": 2.0354464894342197, "grad_norm": 18.229864512095045, "learning_rate": 2.8342830406120635e-06, "loss": 0.8862, "step": 5972 }, { "epoch": 2.0357873210633946, "grad_norm": 10.036677475721365, "learning_rate": 2.832495812890923e-06, "loss": 1.2354, "step": 5973 }, { "epoch": 2.03612815269257, "grad_norm": 13.136869592550674, "learning_rate": 2.8307089261268227e-06, "loss": 1.3118, "step": 5974 }, { "epoch": 2.036468984321745, "grad_norm": 18.686387038903497, "learning_rate": 2.8289223806008424e-06, "loss": 2.0248, "step": 5975 }, { "epoch": 2.03680981595092, "grad_norm": 13.215247090346043, "learning_rate": 2.827136176594014e-06, "loss": 1.2512, "step": 5976 }, { "epoch": 2.0371506475800953, "grad_norm": 14.079692825992746, "learning_rate": 2.825350314387315e-06, "loss": 1.0198, "step": 5977 }, { "epoch": 2.0374914792092707, "grad_norm": 14.87375738348336, "learning_rate": 2.823564794261668e-06, "loss": 1.0766, "step": 5978 }, { "epoch": 2.037832310838446, "grad_norm": 18.551690033997392, "learning_rate": 2.8217796164979427e-06, "loss": 1.2613, "step": 5979 }, { "epoch": 2.038173142467621, "grad_norm": 12.258252728223658, "learning_rate": 2.8199947813769548e-06, "loss": 1.1262, "step": 5980 }, { "epoch": 2.038513974096796, "grad_norm": 16.314821409772044, "learning_rate": 2.818210289179466e-06, "loss": 2.0165, "step": 5981 }, { "epoch": 2.0388548057259714, "grad_norm": 16.700988755700127, "learning_rate": 2.8164261401861836e-06, "loss": 1.4338, "step": 5982 }, { "epoch": 2.0391956373551468, "grad_norm": 15.256187063023779, "learning_rate": 2.8146423346777616e-06, "loss": 1.2842, "step": 5983 }, { "epoch": 2.0395364689843216, "grad_norm": 36.378421910513715, "learning_rate": 2.8128588729348005e-06, "loss": 1.5668, "step": 5984 }, { "epoch": 2.039877300613497, "grad_norm": 15.710338976545621, "learning_rate": 2.811075755237844e-06, "loss": 1.3214, "step": 5985 }, { "epoch": 2.0402181322426722, "grad_norm": 16.6442838158532, "learning_rate": 2.8092929818673865e-06, "loss": 2.0606, "step": 5986 }, { "epoch": 2.040558963871847, "grad_norm": 12.517315078577818, "learning_rate": 2.8075105531038643e-06, "loss": 1.3318, "step": 5987 }, { "epoch": 2.0408997955010224, "grad_norm": 23.570743962372884, "learning_rate": 2.8057284692276565e-06, "loss": 1.8938, "step": 5988 }, { "epoch": 2.0412406271301977, "grad_norm": 17.860622034729275, "learning_rate": 2.8039467305190972e-06, "loss": 1.0906, "step": 5989 }, { "epoch": 2.041581458759373, "grad_norm": 17.103647678434807, "learning_rate": 2.802165337258461e-06, "loss": 1.3997, "step": 5990 }, { "epoch": 2.041922290388548, "grad_norm": 19.44809455620341, "learning_rate": 2.8003842897259627e-06, "loss": 1.2619, "step": 5991 }, { "epoch": 2.042263122017723, "grad_norm": 10.890480835384933, "learning_rate": 2.798603588201775e-06, "loss": 1.5004, "step": 5992 }, { "epoch": 2.0426039536468985, "grad_norm": 14.487906431303864, "learning_rate": 2.796823232966005e-06, "loss": 1.1797, "step": 5993 }, { "epoch": 2.042944785276074, "grad_norm": 12.506358146725352, "learning_rate": 2.795043224298708e-06, "loss": 1.1044, "step": 5994 }, { "epoch": 2.0432856169052487, "grad_norm": 13.451975735746386, "learning_rate": 2.7932635624798943e-06, "loss": 1.6392, "step": 5995 }, { "epoch": 2.043626448534424, "grad_norm": 14.159872105660597, "learning_rate": 2.791484247789504e-06, "loss": 1.1043, "step": 5996 }, { "epoch": 2.0439672801635993, "grad_norm": 16.094392626913468, "learning_rate": 2.789705280507433e-06, "loss": 1.369, "step": 5997 }, { "epoch": 2.044308111792774, "grad_norm": 30.247163995405867, "learning_rate": 2.78792666091352e-06, "loss": 1.6155, "step": 5998 }, { "epoch": 2.0446489434219495, "grad_norm": 14.0390566112355, "learning_rate": 2.7861483892875495e-06, "loss": 1.6663, "step": 5999 }, { "epoch": 2.044989775051125, "grad_norm": 24.97327276724006, "learning_rate": 2.7843704659092525e-06, "loss": 1.6387, "step": 6000 }, { "epoch": 2.0453306066803, "grad_norm": 24.416186008371508, "learning_rate": 2.7825928910582966e-06, "loss": 1.7455, "step": 6001 }, { "epoch": 2.045671438309475, "grad_norm": 10.577153705337054, "learning_rate": 2.780815665014308e-06, "loss": 1.2173, "step": 6002 }, { "epoch": 2.0460122699386503, "grad_norm": 18.654629260731802, "learning_rate": 2.7790387880568517e-06, "loss": 1.5048, "step": 6003 }, { "epoch": 2.0463531015678256, "grad_norm": 15.156581903066163, "learning_rate": 2.77726226046543e-06, "loss": 1.7542, "step": 6004 }, { "epoch": 2.046693933197001, "grad_norm": 18.443219268211227, "learning_rate": 2.775486082519508e-06, "loss": 1.1971, "step": 6005 }, { "epoch": 2.0470347648261757, "grad_norm": 18.0147478288214, "learning_rate": 2.7737102544984784e-06, "loss": 1.3608, "step": 6006 }, { "epoch": 2.047375596455351, "grad_norm": 13.457844550865897, "learning_rate": 2.771934776681686e-06, "loss": 1.5057, "step": 6007 }, { "epoch": 2.0477164280845264, "grad_norm": 12.400843169819435, "learning_rate": 2.7701596493484264e-06, "loss": 1.4659, "step": 6008 }, { "epoch": 2.0480572597137012, "grad_norm": 19.25518660683634, "learning_rate": 2.7683848727779284e-06, "loss": 1.3134, "step": 6009 }, { "epoch": 2.0483980913428765, "grad_norm": 10.82375029106123, "learning_rate": 2.7666104472493714e-06, "loss": 0.9353, "step": 6010 }, { "epoch": 2.048738922972052, "grad_norm": 11.178447776794355, "learning_rate": 2.764836373041886e-06, "loss": 0.7616, "step": 6011 }, { "epoch": 2.049079754601227, "grad_norm": 15.693882628087273, "learning_rate": 2.7630626504345348e-06, "loss": 1.5932, "step": 6012 }, { "epoch": 2.049420586230402, "grad_norm": 12.485021824154034, "learning_rate": 2.7612892797063335e-06, "loss": 1.5816, "step": 6013 }, { "epoch": 2.0497614178595773, "grad_norm": 13.546241994834437, "learning_rate": 2.75951626113624e-06, "loss": 1.0455, "step": 6014 }, { "epoch": 2.0501022494887526, "grad_norm": 17.688580632893196, "learning_rate": 2.757743595003158e-06, "loss": 1.4917, "step": 6015 }, { "epoch": 2.050443081117928, "grad_norm": 13.344554766177442, "learning_rate": 2.755971281585934e-06, "loss": 1.2476, "step": 6016 }, { "epoch": 2.050783912747103, "grad_norm": 14.113661888596019, "learning_rate": 2.75419932116336e-06, "loss": 1.8507, "step": 6017 }, { "epoch": 2.051124744376278, "grad_norm": 17.021027433294623, "learning_rate": 2.752427714014173e-06, "loss": 0.9821, "step": 6018 }, { "epoch": 2.0514655760054534, "grad_norm": 12.953856401677186, "learning_rate": 2.750656460417054e-06, "loss": 1.4327, "step": 6019 }, { "epoch": 2.0518064076346283, "grad_norm": 13.560919437554038, "learning_rate": 2.7488855606506282e-06, "loss": 1.455, "step": 6020 }, { "epoch": 2.0521472392638036, "grad_norm": 14.799371585388036, "learning_rate": 2.7471150149934663e-06, "loss": 1.8687, "step": 6021 }, { "epoch": 2.052488070892979, "grad_norm": 11.344167565835725, "learning_rate": 2.7453448237240767e-06, "loss": 1.2249, "step": 6022 }, { "epoch": 2.052828902522154, "grad_norm": 25.00752750432449, "learning_rate": 2.743574987120924e-06, "loss": 0.8313, "step": 6023 }, { "epoch": 2.053169734151329, "grad_norm": 22.336977276250384, "learning_rate": 2.7418055054624094e-06, "loss": 1.6226, "step": 6024 }, { "epoch": 2.0535105657805044, "grad_norm": 12.99398817522244, "learning_rate": 2.7400363790268746e-06, "loss": 1.0414, "step": 6025 }, { "epoch": 2.0538513974096797, "grad_norm": 12.414071117350106, "learning_rate": 2.738267608092618e-06, "loss": 1.2922, "step": 6026 }, { "epoch": 2.054192229038855, "grad_norm": 19.809823975390476, "learning_rate": 2.7364991929378683e-06, "loss": 1.426, "step": 6027 }, { "epoch": 2.05453306066803, "grad_norm": 16.900231812489267, "learning_rate": 2.7347311338408056e-06, "loss": 1.3484, "step": 6028 }, { "epoch": 2.054873892297205, "grad_norm": 16.406799376966656, "learning_rate": 2.732963431079553e-06, "loss": 1.7921, "step": 6029 }, { "epoch": 2.0552147239263805, "grad_norm": 16.859931584052653, "learning_rate": 2.731196084932177e-06, "loss": 0.9953, "step": 6030 }, { "epoch": 2.0555555555555554, "grad_norm": 25.677850779193093, "learning_rate": 2.729429095676689e-06, "loss": 1.7094, "step": 6031 }, { "epoch": 2.0558963871847307, "grad_norm": 16.148017266392028, "learning_rate": 2.727662463591042e-06, "loss": 1.5772, "step": 6032 }, { "epoch": 2.056237218813906, "grad_norm": 14.751012811057285, "learning_rate": 2.725896188953136e-06, "loss": 1.7026, "step": 6033 }, { "epoch": 2.0565780504430813, "grad_norm": 28.78919166366769, "learning_rate": 2.7241302720408118e-06, "loss": 1.3724, "step": 6034 }, { "epoch": 2.056918882072256, "grad_norm": 15.802547797600617, "learning_rate": 2.722364713131856e-06, "loss": 1.9759, "step": 6035 }, { "epoch": 2.0572597137014315, "grad_norm": 11.466984607373803, "learning_rate": 2.720599512503997e-06, "loss": 1.3579, "step": 6036 }, { "epoch": 2.0576005453306068, "grad_norm": 14.21223671151292, "learning_rate": 2.7188346704349107e-06, "loss": 1.5336, "step": 6037 }, { "epoch": 2.057941376959782, "grad_norm": 13.292855534573198, "learning_rate": 2.7170701872022077e-06, "loss": 1.2358, "step": 6038 }, { "epoch": 2.058282208588957, "grad_norm": 20.041841894388508, "learning_rate": 2.7153060630834536e-06, "loss": 1.7245, "step": 6039 }, { "epoch": 2.0586230402181322, "grad_norm": 22.40448740227055, "learning_rate": 2.7135422983561526e-06, "loss": 1.5224, "step": 6040 }, { "epoch": 2.0589638718473076, "grad_norm": 12.039356046674603, "learning_rate": 2.711778893297746e-06, "loss": 1.2668, "step": 6041 }, { "epoch": 2.0593047034764824, "grad_norm": 11.956785772094253, "learning_rate": 2.710015848185633e-06, "loss": 1.6172, "step": 6042 }, { "epoch": 2.0596455351056577, "grad_norm": 18.209279678093335, "learning_rate": 2.7082531632971404e-06, "loss": 2.1429, "step": 6043 }, { "epoch": 2.059986366734833, "grad_norm": 15.899561881449745, "learning_rate": 2.706490838909547e-06, "loss": 1.6744, "step": 6044 }, { "epoch": 2.0603271983640083, "grad_norm": 17.72625694971961, "learning_rate": 2.704728875300077e-06, "loss": 1.3596, "step": 6045 }, { "epoch": 2.060668029993183, "grad_norm": 15.840347766720532, "learning_rate": 2.702967272745891e-06, "loss": 1.336, "step": 6046 }, { "epoch": 2.0610088616223585, "grad_norm": 17.273400544838758, "learning_rate": 2.701206031524096e-06, "loss": 0.9953, "step": 6047 }, { "epoch": 2.061349693251534, "grad_norm": 28.844992945399945, "learning_rate": 2.699445151911745e-06, "loss": 1.5225, "step": 6048 }, { "epoch": 2.061690524880709, "grad_norm": 24.2814212972102, "learning_rate": 2.6976846341858276e-06, "loss": 2.1037, "step": 6049 }, { "epoch": 2.062031356509884, "grad_norm": 18.645355438389213, "learning_rate": 2.695924478623283e-06, "loss": 1.6175, "step": 6050 }, { "epoch": 2.0623721881390593, "grad_norm": 11.683049665550653, "learning_rate": 2.6941646855009904e-06, "loss": 1.2154, "step": 6051 }, { "epoch": 2.0627130197682346, "grad_norm": 18.548292024839743, "learning_rate": 2.6924052550957714e-06, "loss": 1.6116, "step": 6052 }, { "epoch": 2.0630538513974095, "grad_norm": 16.74877859613324, "learning_rate": 2.6906461876843912e-06, "loss": 1.3394, "step": 6053 }, { "epoch": 2.063394683026585, "grad_norm": 14.535949513823127, "learning_rate": 2.6888874835435595e-06, "loss": 1.9623, "step": 6054 }, { "epoch": 2.06373551465576, "grad_norm": 12.37969535636808, "learning_rate": 2.6871291429499285e-06, "loss": 1.26, "step": 6055 }, { "epoch": 2.0640763462849354, "grad_norm": 15.788307552707177, "learning_rate": 2.6853711661800864e-06, "loss": 0.6226, "step": 6056 }, { "epoch": 2.0644171779141103, "grad_norm": 13.585270057528318, "learning_rate": 2.6836135535105757e-06, "loss": 1.4585, "step": 6057 }, { "epoch": 2.0647580095432856, "grad_norm": 22.546061525282823, "learning_rate": 2.681856305217876e-06, "loss": 1.2109, "step": 6058 }, { "epoch": 2.065098841172461, "grad_norm": 14.502815395974558, "learning_rate": 2.680099421578403e-06, "loss": 1.8548, "step": 6059 }, { "epoch": 2.065439672801636, "grad_norm": 20.275568642074553, "learning_rate": 2.6783429028685305e-06, "loss": 1.3786, "step": 6060 }, { "epoch": 2.065780504430811, "grad_norm": 56.41157065903689, "learning_rate": 2.676586749364559e-06, "loss": 1.2619, "step": 6061 }, { "epoch": 2.0661213360599864, "grad_norm": 14.766453962083215, "learning_rate": 2.6748309613427415e-06, "loss": 1.2012, "step": 6062 }, { "epoch": 2.0664621676891617, "grad_norm": 15.320608830606233, "learning_rate": 2.6730755390792693e-06, "loss": 1.8334, "step": 6063 }, { "epoch": 2.0668029993183366, "grad_norm": 19.485473023571114, "learning_rate": 2.671320482850278e-06, "loss": 1.2792, "step": 6064 }, { "epoch": 2.067143830947512, "grad_norm": 15.671138089881833, "learning_rate": 2.669565792931844e-06, "loss": 1.6214, "step": 6065 }, { "epoch": 2.067484662576687, "grad_norm": 10.77059076243292, "learning_rate": 2.6678114695999887e-06, "loss": 1.4354, "step": 6066 }, { "epoch": 2.0678254942058625, "grad_norm": 14.518618073019598, "learning_rate": 2.666057513130672e-06, "loss": 1.5965, "step": 6067 }, { "epoch": 2.0681663258350373, "grad_norm": 20.666993107282813, "learning_rate": 2.6643039237998003e-06, "loss": 2.0369, "step": 6068 }, { "epoch": 2.0685071574642127, "grad_norm": 12.800932947795168, "learning_rate": 2.6625507018832194e-06, "loss": 1.2945, "step": 6069 }, { "epoch": 2.068847989093388, "grad_norm": 17.017312271731605, "learning_rate": 2.6607978476567177e-06, "loss": 1.4761, "step": 6070 }, { "epoch": 2.0691888207225633, "grad_norm": 23.388578967087565, "learning_rate": 2.6590453613960277e-06, "loss": 1.9822, "step": 6071 }, { "epoch": 2.069529652351738, "grad_norm": 17.589177001248505, "learning_rate": 2.657293243376817e-06, "loss": 1.6955, "step": 6072 }, { "epoch": 2.0698704839809134, "grad_norm": 15.749406977705906, "learning_rate": 2.6555414938747084e-06, "loss": 1.5027, "step": 6073 }, { "epoch": 2.0702113156100888, "grad_norm": 17.47134661130064, "learning_rate": 2.6537901131652565e-06, "loss": 0.6588, "step": 6074 }, { "epoch": 2.0705521472392636, "grad_norm": 24.25734571791591, "learning_rate": 2.6520391015239556e-06, "loss": 1.6862, "step": 6075 }, { "epoch": 2.070892978868439, "grad_norm": 14.572952244637522, "learning_rate": 2.6502884592262544e-06, "loss": 1.6116, "step": 6076 }, { "epoch": 2.0712338104976142, "grad_norm": 14.083167645037026, "learning_rate": 2.6485381865475314e-06, "loss": 1.547, "step": 6077 }, { "epoch": 2.0715746421267895, "grad_norm": 19.117832908992483, "learning_rate": 2.646788283763109e-06, "loss": 1.4642, "step": 6078 }, { "epoch": 2.0719154737559644, "grad_norm": 19.033240760460487, "learning_rate": 2.645038751148262e-06, "loss": 1.2163, "step": 6079 }, { "epoch": 2.0722563053851397, "grad_norm": 13.465682340100447, "learning_rate": 2.643289588978192e-06, "loss": 0.9421, "step": 6080 }, { "epoch": 2.072597137014315, "grad_norm": 13.651312340434018, "learning_rate": 2.6415407975280506e-06, "loss": 1.3675, "step": 6081 }, { "epoch": 2.0729379686434903, "grad_norm": 10.917648731626931, "learning_rate": 2.6397923770729307e-06, "loss": 1.1248, "step": 6082 }, { "epoch": 2.073278800272665, "grad_norm": 32.789668369243, "learning_rate": 2.638044327887865e-06, "loss": 1.2036, "step": 6083 }, { "epoch": 2.0736196319018405, "grad_norm": 13.592693386183711, "learning_rate": 2.6362966502478287e-06, "loss": 1.3309, "step": 6084 }, { "epoch": 2.073960463531016, "grad_norm": 14.679608985673678, "learning_rate": 2.634549344427739e-06, "loss": 1.1382, "step": 6085 }, { "epoch": 2.0743012951601907, "grad_norm": 13.817074769864439, "learning_rate": 2.6328024107024542e-06, "loss": 1.6737, "step": 6086 }, { "epoch": 2.074642126789366, "grad_norm": 10.14321985410071, "learning_rate": 2.631055849346773e-06, "loss": 1.0175, "step": 6087 }, { "epoch": 2.0749829584185413, "grad_norm": 13.2557421548651, "learning_rate": 2.6293096606354363e-06, "loss": 1.3459, "step": 6088 }, { "epoch": 2.0753237900477166, "grad_norm": 14.582240981110093, "learning_rate": 2.62756384484313e-06, "loss": 1.4841, "step": 6089 }, { "epoch": 2.0756646216768915, "grad_norm": 15.478681318798388, "learning_rate": 2.625818402244471e-06, "loss": 1.2363, "step": 6090 }, { "epoch": 2.076005453306067, "grad_norm": 15.431197323057281, "learning_rate": 2.62407333311403e-06, "loss": 1.8195, "step": 6091 }, { "epoch": 2.076346284935242, "grad_norm": 14.152281696680708, "learning_rate": 2.622328637726314e-06, "loss": 1.466, "step": 6092 }, { "epoch": 2.0766871165644174, "grad_norm": 12.80147956868569, "learning_rate": 2.6205843163557663e-06, "loss": 1.1673, "step": 6093 }, { "epoch": 2.0770279481935923, "grad_norm": 10.96965224821824, "learning_rate": 2.6188403692767774e-06, "loss": 1.3429, "step": 6094 }, { "epoch": 2.0773687798227676, "grad_norm": 19.851841825610535, "learning_rate": 2.6170967967636774e-06, "loss": 1.2368, "step": 6095 }, { "epoch": 2.077709611451943, "grad_norm": 13.563789026178334, "learning_rate": 2.615353599090737e-06, "loss": 1.2335, "step": 6096 }, { "epoch": 2.0780504430811177, "grad_norm": 15.114462971027761, "learning_rate": 2.6136107765321683e-06, "loss": 0.9032, "step": 6097 }, { "epoch": 2.078391274710293, "grad_norm": 17.601266954428198, "learning_rate": 2.6118683293621237e-06, "loss": 1.4512, "step": 6098 }, { "epoch": 2.0787321063394684, "grad_norm": 14.304083719288617, "learning_rate": 2.610126257854698e-06, "loss": 1.7248, "step": 6099 }, { "epoch": 2.0790729379686437, "grad_norm": 16.826779020232355, "learning_rate": 2.608384562283925e-06, "loss": 1.3466, "step": 6100 }, { "epoch": 2.0794137695978185, "grad_norm": 15.05610091767622, "learning_rate": 2.606643242923781e-06, "loss": 1.5338, "step": 6101 }, { "epoch": 2.079754601226994, "grad_norm": 16.033104959977727, "learning_rate": 2.6049023000481823e-06, "loss": 1.6509, "step": 6102 }, { "epoch": 2.080095432856169, "grad_norm": 15.394587954610563, "learning_rate": 2.603161733930986e-06, "loss": 1.3887, "step": 6103 }, { "epoch": 2.0804362644853445, "grad_norm": 15.938537277164354, "learning_rate": 2.6014215448459907e-06, "loss": 1.7401, "step": 6104 }, { "epoch": 2.0807770961145193, "grad_norm": 14.552092212955108, "learning_rate": 2.5996817330669368e-06, "loss": 1.2871, "step": 6105 }, { "epoch": 2.0811179277436946, "grad_norm": 16.65300552767382, "learning_rate": 2.5979422988674965e-06, "loss": 1.4677, "step": 6106 }, { "epoch": 2.08145875937287, "grad_norm": 15.397524332764828, "learning_rate": 2.596203242521297e-06, "loss": 1.7931, "step": 6107 }, { "epoch": 2.081799591002045, "grad_norm": 18.038544111745388, "learning_rate": 2.5944645643018983e-06, "loss": 1.3578, "step": 6108 }, { "epoch": 2.08214042263122, "grad_norm": 27.227305110304606, "learning_rate": 2.5927262644827954e-06, "loss": 1.3397, "step": 6109 }, { "epoch": 2.0824812542603954, "grad_norm": 12.421361058109603, "learning_rate": 2.5909883433374385e-06, "loss": 1.2993, "step": 6110 }, { "epoch": 2.0828220858895707, "grad_norm": 13.945254123826569, "learning_rate": 2.5892508011392027e-06, "loss": 0.8619, "step": 6111 }, { "epoch": 2.0831629175187456, "grad_norm": 13.792160383919931, "learning_rate": 2.5875136381614097e-06, "loss": 1.6408, "step": 6112 }, { "epoch": 2.083503749147921, "grad_norm": 16.425379022122037, "learning_rate": 2.58577685467733e-06, "loss": 1.7851, "step": 6113 }, { "epoch": 2.0838445807770962, "grad_norm": 18.70950358103788, "learning_rate": 2.584040450960159e-06, "loss": 1.6459, "step": 6114 }, { "epoch": 2.084185412406271, "grad_norm": 25.937678828904225, "learning_rate": 2.5823044272830427e-06, "loss": 1.2769, "step": 6115 }, { "epoch": 2.0845262440354464, "grad_norm": 12.226877423438392, "learning_rate": 2.580568783919064e-06, "loss": 1.0606, "step": 6116 }, { "epoch": 2.0848670756646217, "grad_norm": 16.80550334720805, "learning_rate": 2.5788335211412475e-06, "loss": 1.3376, "step": 6117 }, { "epoch": 2.085207907293797, "grad_norm": 18.628170207955108, "learning_rate": 2.577098639222555e-06, "loss": 1.5323, "step": 6118 }, { "epoch": 2.085548738922972, "grad_norm": 15.828134393008575, "learning_rate": 2.575364138435893e-06, "loss": 1.6429, "step": 6119 }, { "epoch": 2.085889570552147, "grad_norm": 13.190746591572411, "learning_rate": 2.573630019054103e-06, "loss": 1.4296, "step": 6120 }, { "epoch": 2.0862304021813225, "grad_norm": 102.84232613873372, "learning_rate": 2.5718962813499697e-06, "loss": 1.6036, "step": 6121 }, { "epoch": 2.086571233810498, "grad_norm": 13.819700736769416, "learning_rate": 2.570162925596218e-06, "loss": 1.3789, "step": 6122 }, { "epoch": 2.0869120654396727, "grad_norm": 11.736756410628727, "learning_rate": 2.56842995206551e-06, "loss": 1.1257, "step": 6123 }, { "epoch": 2.087252897068848, "grad_norm": 19.180986913230555, "learning_rate": 2.5666973610304504e-06, "loss": 0.8372, "step": 6124 }, { "epoch": 2.0875937286980233, "grad_norm": 23.21173431691631, "learning_rate": 2.564965152763582e-06, "loss": 1.5141, "step": 6125 }, { "epoch": 2.087934560327198, "grad_norm": 14.984080398995618, "learning_rate": 2.56323332753739e-06, "loss": 1.5522, "step": 6126 }, { "epoch": 2.0882753919563735, "grad_norm": 11.564028323400402, "learning_rate": 2.5615018856242945e-06, "loss": 1.0661, "step": 6127 }, { "epoch": 2.0886162235855488, "grad_norm": 12.061514091321545, "learning_rate": 2.559770827296657e-06, "loss": 1.2072, "step": 6128 }, { "epoch": 2.088957055214724, "grad_norm": 15.393214033923492, "learning_rate": 2.5580401528267866e-06, "loss": 1.3872, "step": 6129 }, { "epoch": 2.089297886843899, "grad_norm": 21.11444477445145, "learning_rate": 2.5563098624869194e-06, "loss": 1.2574, "step": 6130 }, { "epoch": 2.0896387184730743, "grad_norm": 14.52942521148876, "learning_rate": 2.554579956549238e-06, "loss": 0.8967, "step": 6131 }, { "epoch": 2.0899795501022496, "grad_norm": 17.2646699837619, "learning_rate": 2.552850435285864e-06, "loss": 1.5475, "step": 6132 }, { "epoch": 2.090320381731425, "grad_norm": 16.95524370321703, "learning_rate": 2.5511212989688587e-06, "loss": 1.9298, "step": 6133 }, { "epoch": 2.0906612133605997, "grad_norm": 16.692066294098144, "learning_rate": 2.5493925478702216e-06, "loss": 1.3164, "step": 6134 }, { "epoch": 2.091002044989775, "grad_norm": 18.18426543837245, "learning_rate": 2.5476641822618912e-06, "loss": 1.6635, "step": 6135 }, { "epoch": 2.0913428766189504, "grad_norm": 11.194701611156594, "learning_rate": 2.5459362024157474e-06, "loss": 1.0975, "step": 6136 }, { "epoch": 2.091683708248125, "grad_norm": 15.250106149854359, "learning_rate": 2.544208608603609e-06, "loss": 1.5212, "step": 6137 }, { "epoch": 2.0920245398773005, "grad_norm": 12.93481805034485, "learning_rate": 2.542481401097231e-06, "loss": 1.4306, "step": 6138 }, { "epoch": 2.092365371506476, "grad_norm": 18.168586614664438, "learning_rate": 2.540754580168314e-06, "loss": 1.7038, "step": 6139 }, { "epoch": 2.092706203135651, "grad_norm": 33.07945385121587, "learning_rate": 2.5390281460884874e-06, "loss": 1.1514, "step": 6140 }, { "epoch": 2.093047034764826, "grad_norm": 14.269471215201243, "learning_rate": 2.5373020991293317e-06, "loss": 1.3484, "step": 6141 }, { "epoch": 2.0933878663940013, "grad_norm": 16.395249593298892, "learning_rate": 2.535576439562362e-06, "loss": 1.9696, "step": 6142 }, { "epoch": 2.0937286980231766, "grad_norm": 11.696485683479517, "learning_rate": 2.533851167659024e-06, "loss": 1.577, "step": 6143 }, { "epoch": 2.094069529652352, "grad_norm": 16.25595397449943, "learning_rate": 2.5321262836907177e-06, "loss": 1.5297, "step": 6144 }, { "epoch": 2.094410361281527, "grad_norm": 15.14408564607669, "learning_rate": 2.53040178792877e-06, "loss": 1.5605, "step": 6145 }, { "epoch": 2.094751192910702, "grad_norm": 21.54468077867544, "learning_rate": 2.52867768064445e-06, "loss": 1.4567, "step": 6146 }, { "epoch": 2.0950920245398774, "grad_norm": 13.162594712674546, "learning_rate": 2.5269539621089722e-06, "loss": 1.8168, "step": 6147 }, { "epoch": 2.0954328561690523, "grad_norm": 14.926812246013798, "learning_rate": 2.525230632593479e-06, "loss": 1.3898, "step": 6148 }, { "epoch": 2.0957736877982276, "grad_norm": 17.962923445232153, "learning_rate": 2.5235076923690586e-06, "loss": 2.0, "step": 6149 }, { "epoch": 2.096114519427403, "grad_norm": 12.51456168018112, "learning_rate": 2.5217851417067367e-06, "loss": 1.7192, "step": 6150 }, { "epoch": 2.096455351056578, "grad_norm": 15.538872697580905, "learning_rate": 2.520062980877477e-06, "loss": 1.5963, "step": 6151 }, { "epoch": 2.096796182685753, "grad_norm": 14.662384162596243, "learning_rate": 2.5183412101521833e-06, "loss": 1.4479, "step": 6152 }, { "epoch": 2.0971370143149284, "grad_norm": 20.562380753543863, "learning_rate": 2.516619829801695e-06, "loss": 1.4838, "step": 6153 }, { "epoch": 2.0974778459441037, "grad_norm": 13.266682426061363, "learning_rate": 2.5148988400967946e-06, "loss": 0.8552, "step": 6154 }, { "epoch": 2.097818677573279, "grad_norm": 22.366027180919325, "learning_rate": 2.5131782413081977e-06, "loss": 1.9986, "step": 6155 }, { "epoch": 2.098159509202454, "grad_norm": 12.396119150253158, "learning_rate": 2.511458033706564e-06, "loss": 1.5425, "step": 6156 }, { "epoch": 2.098500340831629, "grad_norm": 16.81921093321046, "learning_rate": 2.5097382175624874e-06, "loss": 1.7593, "step": 6157 }, { "epoch": 2.0988411724608045, "grad_norm": 16.619739109704298, "learning_rate": 2.5080187931465044e-06, "loss": 1.5801, "step": 6158 }, { "epoch": 2.0991820040899793, "grad_norm": 18.402601181166105, "learning_rate": 2.506299760729081e-06, "loss": 1.8661, "step": 6159 }, { "epoch": 2.0995228357191547, "grad_norm": 16.024685125915806, "learning_rate": 2.504581120580637e-06, "loss": 1.7912, "step": 6160 }, { "epoch": 2.09986366734833, "grad_norm": 14.484908768766035, "learning_rate": 2.502862872971513e-06, "loss": 1.8996, "step": 6161 }, { "epoch": 2.1002044989775053, "grad_norm": 17.181903053383806, "learning_rate": 2.5011450181719986e-06, "loss": 1.4511, "step": 6162 }, { "epoch": 2.10054533060668, "grad_norm": 23.966972993226904, "learning_rate": 2.4994275564523247e-06, "loss": 1.5346, "step": 6163 }, { "epoch": 2.1008861622358554, "grad_norm": 16.37616271068201, "learning_rate": 2.4977104880826473e-06, "loss": 1.1504, "step": 6164 }, { "epoch": 2.1012269938650308, "grad_norm": 15.562064829282981, "learning_rate": 2.495993813333072e-06, "loss": 1.4629, "step": 6165 }, { "epoch": 2.101567825494206, "grad_norm": 14.720441695349686, "learning_rate": 2.494277532473637e-06, "loss": 0.7938, "step": 6166 }, { "epoch": 2.101908657123381, "grad_norm": 16.000533015867244, "learning_rate": 2.4925616457743213e-06, "loss": 1.6223, "step": 6167 }, { "epoch": 2.1022494887525562, "grad_norm": 11.686294472381654, "learning_rate": 2.49084615350504e-06, "loss": 1.5167, "step": 6168 }, { "epoch": 2.1025903203817315, "grad_norm": 14.724419173929975, "learning_rate": 2.4891310559356475e-06, "loss": 1.7129, "step": 6169 }, { "epoch": 2.1029311520109064, "grad_norm": 18.170503164137894, "learning_rate": 2.4874163533359346e-06, "loss": 2.2063, "step": 6170 }, { "epoch": 2.1032719836400817, "grad_norm": 13.07394594438088, "learning_rate": 2.4857020459756315e-06, "loss": 1.59, "step": 6171 }, { "epoch": 2.103612815269257, "grad_norm": 15.257974337679112, "learning_rate": 2.4839881341244052e-06, "loss": 1.4643, "step": 6172 }, { "epoch": 2.1039536468984323, "grad_norm": 19.077513556372793, "learning_rate": 2.482274618051864e-06, "loss": 1.8137, "step": 6173 }, { "epoch": 2.104294478527607, "grad_norm": 12.788193917193889, "learning_rate": 2.480561498027543e-06, "loss": 1.4631, "step": 6174 }, { "epoch": 2.1046353101567825, "grad_norm": 10.515969771645617, "learning_rate": 2.4788487743209298e-06, "loss": 1.2403, "step": 6175 }, { "epoch": 2.104976141785958, "grad_norm": 17.287429260340407, "learning_rate": 2.4771364472014424e-06, "loss": 1.3456, "step": 6176 }, { "epoch": 2.105316973415133, "grad_norm": 10.390262782763132, "learning_rate": 2.4754245169384312e-06, "loss": 1.1472, "step": 6177 }, { "epoch": 2.105657805044308, "grad_norm": 10.512995494317291, "learning_rate": 2.473712983801197e-06, "loss": 1.0583, "step": 6178 }, { "epoch": 2.1059986366734833, "grad_norm": 22.069748460228656, "learning_rate": 2.4720018480589647e-06, "loss": 1.4847, "step": 6179 }, { "epoch": 2.1063394683026586, "grad_norm": 16.557476666612466, "learning_rate": 2.4702911099809037e-06, "loss": 1.6198, "step": 6180 }, { "epoch": 2.1066802999318335, "grad_norm": 12.089123404391028, "learning_rate": 2.468580769836125e-06, "loss": 1.5066, "step": 6181 }, { "epoch": 2.107021131561009, "grad_norm": 17.398349067031955, "learning_rate": 2.466870827893666e-06, "loss": 1.3319, "step": 6182 }, { "epoch": 2.107361963190184, "grad_norm": 18.86077421489594, "learning_rate": 2.4651612844225096e-06, "loss": 1.3355, "step": 6183 }, { "epoch": 2.1077027948193594, "grad_norm": 16.583216478293142, "learning_rate": 2.4634521396915744e-06, "loss": 1.6671, "step": 6184 }, { "epoch": 2.1080436264485343, "grad_norm": 16.07739769556006, "learning_rate": 2.4617433939697145e-06, "loss": 1.622, "step": 6185 }, { "epoch": 2.1083844580777096, "grad_norm": 18.322705651788223, "learning_rate": 2.4600350475257228e-06, "loss": 1.454, "step": 6186 }, { "epoch": 2.108725289706885, "grad_norm": 18.998029407520956, "learning_rate": 2.4583271006283293e-06, "loss": 1.8137, "step": 6187 }, { "epoch": 2.10906612133606, "grad_norm": 19.264387821616577, "learning_rate": 2.456619553546201e-06, "loss": 1.3773, "step": 6188 }, { "epoch": 2.109406952965235, "grad_norm": 12.891160070422545, "learning_rate": 2.4549124065479413e-06, "loss": 1.4889, "step": 6189 }, { "epoch": 2.1097477845944104, "grad_norm": 15.677101250559057, "learning_rate": 2.4532056599020913e-06, "loss": 1.0845, "step": 6190 }, { "epoch": 2.1100886162235857, "grad_norm": 16.928932569219533, "learning_rate": 2.45149931387713e-06, "loss": 1.3542, "step": 6191 }, { "epoch": 2.1104294478527605, "grad_norm": 20.07106231808636, "learning_rate": 2.4497933687414726e-06, "loss": 1.0626, "step": 6192 }, { "epoch": 2.110770279481936, "grad_norm": 10.159122480182205, "learning_rate": 2.4480878247634666e-06, "loss": 1.1576, "step": 6193 }, { "epoch": 2.111111111111111, "grad_norm": 17.750598568470892, "learning_rate": 2.446382682211408e-06, "loss": 1.3528, "step": 6194 }, { "epoch": 2.1114519427402865, "grad_norm": 12.254837350061026, "learning_rate": 2.4446779413535172e-06, "loss": 1.109, "step": 6195 }, { "epoch": 2.1117927743694613, "grad_norm": 15.344193710365971, "learning_rate": 2.442973602457956e-06, "loss": 1.6399, "step": 6196 }, { "epoch": 2.1121336059986366, "grad_norm": 15.731890695320631, "learning_rate": 2.4412696657928302e-06, "loss": 1.4775, "step": 6197 }, { "epoch": 2.112474437627812, "grad_norm": 13.690758351425206, "learning_rate": 2.43956613162617e-06, "loss": 0.8961, "step": 6198 }, { "epoch": 2.1128152692569873, "grad_norm": 43.80055795937283, "learning_rate": 2.4378630002259495e-06, "loss": 1.3721, "step": 6199 }, { "epoch": 2.113156100886162, "grad_norm": 27.556066548751897, "learning_rate": 2.4361602718600786e-06, "loss": 1.5982, "step": 6200 }, { "epoch": 2.1134969325153374, "grad_norm": 14.588278483913742, "learning_rate": 2.4344579467964023e-06, "loss": 1.3398, "step": 6201 }, { "epoch": 2.1138377641445127, "grad_norm": 13.978777830892287, "learning_rate": 2.4327560253027044e-06, "loss": 1.5702, "step": 6202 }, { "epoch": 2.1141785957736876, "grad_norm": 16.26471225776593, "learning_rate": 2.4310545076467035e-06, "loss": 1.4277, "step": 6203 }, { "epoch": 2.114519427402863, "grad_norm": 13.284290223879385, "learning_rate": 2.4293533940960545e-06, "loss": 1.1142, "step": 6204 }, { "epoch": 2.1148602590320382, "grad_norm": 12.88971980470685, "learning_rate": 2.42765268491835e-06, "loss": 1.9101, "step": 6205 }, { "epoch": 2.1152010906612135, "grad_norm": 16.949406234675738, "learning_rate": 2.425952380381118e-06, "loss": 1.4044, "step": 6206 }, { "epoch": 2.1155419222903884, "grad_norm": 17.68506171012958, "learning_rate": 2.4242524807518254e-06, "loss": 1.9936, "step": 6207 }, { "epoch": 2.1158827539195637, "grad_norm": 13.088714802518906, "learning_rate": 2.422552986297867e-06, "loss": 0.9028, "step": 6208 }, { "epoch": 2.116223585548739, "grad_norm": 14.857121157685297, "learning_rate": 2.420853897286586e-06, "loss": 1.0284, "step": 6209 }, { "epoch": 2.116564417177914, "grad_norm": 13.993194576467637, "learning_rate": 2.4191552139852553e-06, "loss": 1.6916, "step": 6210 }, { "epoch": 2.116905248807089, "grad_norm": 15.1254759990384, "learning_rate": 2.41745693666108e-06, "loss": 1.2018, "step": 6211 }, { "epoch": 2.1172460804362645, "grad_norm": 19.305121783747566, "learning_rate": 2.415759065581213e-06, "loss": 1.2828, "step": 6212 }, { "epoch": 2.11758691206544, "grad_norm": 16.72236711560976, "learning_rate": 2.41406160101273e-06, "loss": 2.0018, "step": 6213 }, { "epoch": 2.1179277436946147, "grad_norm": 12.793291147294214, "learning_rate": 2.4123645432226496e-06, "loss": 1.2676, "step": 6214 }, { "epoch": 2.11826857532379, "grad_norm": 11.126144617602678, "learning_rate": 2.4106678924779308e-06, "loss": 1.1408, "step": 6215 }, { "epoch": 2.1186094069529653, "grad_norm": 12.635092231867176, "learning_rate": 2.4089716490454583e-06, "loss": 1.3615, "step": 6216 }, { "epoch": 2.1189502385821406, "grad_norm": 13.257218623140941, "learning_rate": 2.4072758131920576e-06, "loss": 1.4355, "step": 6217 }, { "epoch": 2.1192910702113155, "grad_norm": 29.672470108662683, "learning_rate": 2.4055803851844967e-06, "loss": 1.3088, "step": 6218 }, { "epoch": 2.1196319018404908, "grad_norm": 12.286279397293868, "learning_rate": 2.4038853652894673e-06, "loss": 1.5109, "step": 6219 }, { "epoch": 2.119972733469666, "grad_norm": 11.652715837303564, "learning_rate": 2.402190753773604e-06, "loss": 1.271, "step": 6220 }, { "epoch": 2.120313565098841, "grad_norm": 16.916275278310973, "learning_rate": 2.400496550903476e-06, "loss": 1.3403, "step": 6221 }, { "epoch": 2.1206543967280163, "grad_norm": 13.10917799421779, "learning_rate": 2.3988027569455895e-06, "loss": 1.4327, "step": 6222 }, { "epoch": 2.1209952283571916, "grad_norm": 10.65546535078804, "learning_rate": 2.3971093721663852e-06, "loss": 1.127, "step": 6223 }, { "epoch": 2.121336059986367, "grad_norm": 12.51169225601301, "learning_rate": 2.3954163968322346e-06, "loss": 1.4258, "step": 6224 }, { "epoch": 2.1216768916155417, "grad_norm": 29.685097632061186, "learning_rate": 2.3937238312094554e-06, "loss": 2.1042, "step": 6225 }, { "epoch": 2.122017723244717, "grad_norm": 17.567812470614864, "learning_rate": 2.392031675564294e-06, "loss": 1.6751, "step": 6226 }, { "epoch": 2.1223585548738924, "grad_norm": 10.974633101430381, "learning_rate": 2.390339930162928e-06, "loss": 1.1781, "step": 6227 }, { "epoch": 2.1226993865030677, "grad_norm": 19.98576680277969, "learning_rate": 2.3886485952714835e-06, "loss": 1.8475, "step": 6228 }, { "epoch": 2.1230402181322425, "grad_norm": 16.97256159937134, "learning_rate": 2.386957671156008e-06, "loss": 1.7491, "step": 6229 }, { "epoch": 2.123381049761418, "grad_norm": 12.771383893763836, "learning_rate": 2.3852671580824908e-06, "loss": 1.2656, "step": 6230 }, { "epoch": 2.123721881390593, "grad_norm": 13.855489950224936, "learning_rate": 2.3835770563168625e-06, "loss": 1.5997, "step": 6231 }, { "epoch": 2.124062713019768, "grad_norm": 16.841554799089216, "learning_rate": 2.3818873661249765e-06, "loss": 1.7535, "step": 6232 }, { "epoch": 2.1244035446489433, "grad_norm": 20.213317921092244, "learning_rate": 2.380198087772629e-06, "loss": 1.3475, "step": 6233 }, { "epoch": 2.1247443762781186, "grad_norm": 12.420917076683667, "learning_rate": 2.3785092215255522e-06, "loss": 1.3549, "step": 6234 }, { "epoch": 2.125085207907294, "grad_norm": 18.469946030124525, "learning_rate": 2.3768207676494103e-06, "loss": 0.9973, "step": 6235 }, { "epoch": 2.125426039536469, "grad_norm": 13.730821541766405, "learning_rate": 2.375132726409804e-06, "loss": 1.2372, "step": 6236 }, { "epoch": 2.125766871165644, "grad_norm": 15.79464174477173, "learning_rate": 2.3734450980722683e-06, "loss": 0.9658, "step": 6237 }, { "epoch": 2.1261077027948194, "grad_norm": 12.068761785746005, "learning_rate": 2.3717578829022745e-06, "loss": 1.564, "step": 6238 }, { "epoch": 2.1264485344239947, "grad_norm": 23.589222344719598, "learning_rate": 2.370071081165228e-06, "loss": 1.5356, "step": 6239 }, { "epoch": 2.1267893660531696, "grad_norm": 13.054733303692117, "learning_rate": 2.3683846931264704e-06, "loss": 1.5985, "step": 6240 }, { "epoch": 2.127130197682345, "grad_norm": 14.190838531019866, "learning_rate": 2.366698719051276e-06, "loss": 1.2889, "step": 6241 }, { "epoch": 2.12747102931152, "grad_norm": 12.482070505129741, "learning_rate": 2.365013159204855e-06, "loss": 1.4937, "step": 6242 }, { "epoch": 2.127811860940695, "grad_norm": 12.995386860002094, "learning_rate": 2.3633280138523546e-06, "loss": 1.5395, "step": 6243 }, { "epoch": 2.1281526925698704, "grad_norm": 19.66703662457556, "learning_rate": 2.361643283258855e-06, "loss": 1.8207, "step": 6244 }, { "epoch": 2.1284935241990457, "grad_norm": 14.462138820477406, "learning_rate": 2.359958967689365e-06, "loss": 1.3038, "step": 6245 }, { "epoch": 2.128834355828221, "grad_norm": 11.673307518889073, "learning_rate": 2.358275067408842e-06, "loss": 1.2909, "step": 6246 }, { "epoch": 2.129175187457396, "grad_norm": 15.599454766297297, "learning_rate": 2.356591582682168e-06, "loss": 1.696, "step": 6247 }, { "epoch": 2.129516019086571, "grad_norm": 13.802286377834122, "learning_rate": 2.354908513774157e-06, "loss": 1.4262, "step": 6248 }, { "epoch": 2.1298568507157465, "grad_norm": 15.06588519217504, "learning_rate": 2.3532258609495705e-06, "loss": 1.3545, "step": 6249 }, { "epoch": 2.130197682344922, "grad_norm": 13.90662270364312, "learning_rate": 2.35154362447309e-06, "loss": 1.7569, "step": 6250 }, { "epoch": 2.1305385139740967, "grad_norm": 15.898575723083995, "learning_rate": 2.3498618046093386e-06, "loss": 1.4392, "step": 6251 }, { "epoch": 2.130879345603272, "grad_norm": 9.928039767891752, "learning_rate": 2.3481804016228786e-06, "loss": 1.583, "step": 6252 }, { "epoch": 2.1312201772324473, "grad_norm": 14.562711113301557, "learning_rate": 2.3464994157781963e-06, "loss": 1.6195, "step": 6253 }, { "epoch": 2.131561008861622, "grad_norm": 14.767750533405469, "learning_rate": 2.344818847339718e-06, "loss": 1.6509, "step": 6254 }, { "epoch": 2.1319018404907975, "grad_norm": 13.61315302375698, "learning_rate": 2.3431386965718055e-06, "loss": 0.9522, "step": 6255 }, { "epoch": 2.1322426721199728, "grad_norm": 10.272598688221903, "learning_rate": 2.341458963738752e-06, "loss": 1.2024, "step": 6256 }, { "epoch": 2.132583503749148, "grad_norm": 15.202405018658306, "learning_rate": 2.3397796491047888e-06, "loss": 1.7147, "step": 6257 }, { "epoch": 2.132924335378323, "grad_norm": 15.968633579294407, "learning_rate": 2.3381007529340722e-06, "loss": 1.7109, "step": 6258 }, { "epoch": 2.1332651670074982, "grad_norm": 10.025612632097607, "learning_rate": 2.3364222754907055e-06, "loss": 1.06, "step": 6259 }, { "epoch": 2.1336059986366736, "grad_norm": 13.981421572049484, "learning_rate": 2.33474421703872e-06, "loss": 0.8805, "step": 6260 }, { "epoch": 2.133946830265849, "grad_norm": 18.235821659677384, "learning_rate": 2.333066577842075e-06, "loss": 1.5388, "step": 6261 }, { "epoch": 2.1342876618950237, "grad_norm": 16.995087273691716, "learning_rate": 2.331389358164678e-06, "loss": 1.3812, "step": 6262 }, { "epoch": 2.134628493524199, "grad_norm": 9.942863732571093, "learning_rate": 2.3297125582703555e-06, "loss": 1.2258, "step": 6263 }, { "epoch": 2.1349693251533743, "grad_norm": 9.809511492783678, "learning_rate": 2.3280361784228763e-06, "loss": 1.3287, "step": 6264 }, { "epoch": 2.135310156782549, "grad_norm": 26.895011767179426, "learning_rate": 2.326360218885946e-06, "loss": 0.8062, "step": 6265 }, { "epoch": 2.1356509884117245, "grad_norm": 14.662533446715448, "learning_rate": 2.3246846799231947e-06, "loss": 1.2633, "step": 6266 }, { "epoch": 2.1359918200409, "grad_norm": 12.815674344514166, "learning_rate": 2.323009561798193e-06, "loss": 1.1422, "step": 6267 }, { "epoch": 2.136332651670075, "grad_norm": 9.504201943448528, "learning_rate": 2.321334864774444e-06, "loss": 1.182, "step": 6268 }, { "epoch": 2.13667348329925, "grad_norm": 16.282251812498473, "learning_rate": 2.3196605891153833e-06, "loss": 1.812, "step": 6269 }, { "epoch": 2.1370143149284253, "grad_norm": 12.887684177123699, "learning_rate": 2.317986735084382e-06, "loss": 1.1858, "step": 6270 }, { "epoch": 2.1373551465576006, "grad_norm": 18.444917160783973, "learning_rate": 2.3163133029447436e-06, "loss": 1.6392, "step": 6271 }, { "epoch": 2.137695978186776, "grad_norm": 11.305627500062021, "learning_rate": 2.314640292959706e-06, "loss": 1.3338, "step": 6272 }, { "epoch": 2.138036809815951, "grad_norm": 19.619635121942686, "learning_rate": 2.3129677053924403e-06, "loss": 1.861, "step": 6273 }, { "epoch": 2.138377641445126, "grad_norm": 19.612489523792817, "learning_rate": 2.3112955405060506e-06, "loss": 0.7991, "step": 6274 }, { "epoch": 2.1387184730743014, "grad_norm": 15.318109156177933, "learning_rate": 2.3096237985635755e-06, "loss": 1.2076, "step": 6275 }, { "epoch": 2.1390593047034763, "grad_norm": 16.77206580719733, "learning_rate": 2.3079524798279873e-06, "loss": 1.4533, "step": 6276 }, { "epoch": 2.1394001363326516, "grad_norm": 13.052388427426955, "learning_rate": 2.30628158456219e-06, "loss": 1.1748, "step": 6277 }, { "epoch": 2.139740967961827, "grad_norm": 17.27878922728916, "learning_rate": 2.3046111130290243e-06, "loss": 1.7726, "step": 6278 }, { "epoch": 2.140081799591002, "grad_norm": 20.219241755780768, "learning_rate": 2.3029410654912565e-06, "loss": 2.0337, "step": 6279 }, { "epoch": 2.140422631220177, "grad_norm": 18.240753696612053, "learning_rate": 2.301271442211597e-06, "loss": 0.8721, "step": 6280 }, { "epoch": 2.1407634628493524, "grad_norm": 13.795361592816544, "learning_rate": 2.2996022434526855e-06, "loss": 1.3986, "step": 6281 }, { "epoch": 2.1411042944785277, "grad_norm": 16.087069566736456, "learning_rate": 2.297933469477086e-06, "loss": 1.5859, "step": 6282 }, { "epoch": 2.141445126107703, "grad_norm": 13.92242835108406, "learning_rate": 2.2962651205473125e-06, "loss": 1.1575, "step": 6283 }, { "epoch": 2.141785957736878, "grad_norm": 18.097211086291246, "learning_rate": 2.2945971969257964e-06, "loss": 1.452, "step": 6284 }, { "epoch": 2.142126789366053, "grad_norm": 16.579712323263976, "learning_rate": 2.292929698874909e-06, "loss": 1.6616, "step": 6285 }, { "epoch": 2.1424676209952285, "grad_norm": 18.07252041260663, "learning_rate": 2.2912626266569597e-06, "loss": 1.8844, "step": 6286 }, { "epoch": 2.1428084526244033, "grad_norm": 16.699224813486076, "learning_rate": 2.2895959805341804e-06, "loss": 1.4822, "step": 6287 }, { "epoch": 2.1431492842535786, "grad_norm": 15.914625477684664, "learning_rate": 2.2879297607687426e-06, "loss": 0.7948, "step": 6288 }, { "epoch": 2.143490115882754, "grad_norm": 23.769411993802635, "learning_rate": 2.28626396762275e-06, "loss": 1.0411, "step": 6289 }, { "epoch": 2.1438309475119293, "grad_norm": 17.081205802169393, "learning_rate": 2.2845986013582378e-06, "loss": 1.4327, "step": 6290 }, { "epoch": 2.144171779141104, "grad_norm": 14.407140109767882, "learning_rate": 2.2829336622371765e-06, "loss": 1.5771, "step": 6291 }, { "epoch": 2.1445126107702794, "grad_norm": 13.585470377239261, "learning_rate": 2.2812691505214633e-06, "loss": 1.6108, "step": 6292 }, { "epoch": 2.1448534423994547, "grad_norm": 18.459699905032366, "learning_rate": 2.2796050664729367e-06, "loss": 1.6478, "step": 6293 }, { "epoch": 2.14519427402863, "grad_norm": 16.075895915678075, "learning_rate": 2.2779414103533643e-06, "loss": 1.7872, "step": 6294 }, { "epoch": 2.145535105657805, "grad_norm": 22.32423674791049, "learning_rate": 2.27627818242444e-06, "loss": 1.0986, "step": 6295 }, { "epoch": 2.1458759372869802, "grad_norm": 16.330445635158526, "learning_rate": 2.274615382947804e-06, "loss": 1.503, "step": 6296 }, { "epoch": 2.1462167689161555, "grad_norm": 19.648189219501635, "learning_rate": 2.2729530121850156e-06, "loss": 1.214, "step": 6297 }, { "epoch": 2.1465576005453304, "grad_norm": 12.93311872242047, "learning_rate": 2.271291070397573e-06, "loss": 1.1899, "step": 6298 }, { "epoch": 2.1468984321745057, "grad_norm": 18.985718527153487, "learning_rate": 2.2696295578469095e-06, "loss": 1.0277, "step": 6299 }, { "epoch": 2.147239263803681, "grad_norm": 18.714783555084452, "learning_rate": 2.2679684747943847e-06, "loss": 1.6059, "step": 6300 }, { "epoch": 2.1475800954328563, "grad_norm": 18.255945653161383, "learning_rate": 2.2663078215012945e-06, "loss": 1.4174, "step": 6301 }, { "epoch": 2.147920927062031, "grad_norm": 12.614656157512782, "learning_rate": 2.2646475982288657e-06, "loss": 1.0649, "step": 6302 }, { "epoch": 2.1482617586912065, "grad_norm": 17.920239449294478, "learning_rate": 2.262987805238259e-06, "loss": 1.1351, "step": 6303 }, { "epoch": 2.148602590320382, "grad_norm": 14.603666728953467, "learning_rate": 2.261328442790566e-06, "loss": 1.2174, "step": 6304 }, { "epoch": 2.148943421949557, "grad_norm": 14.555673222096987, "learning_rate": 2.2596695111468105e-06, "loss": 1.0307, "step": 6305 }, { "epoch": 2.149284253578732, "grad_norm": 16.56069429707057, "learning_rate": 2.2580110105679493e-06, "loss": 1.822, "step": 6306 }, { "epoch": 2.1496250852079073, "grad_norm": 18.193067309226375, "learning_rate": 2.2563529413148718e-06, "loss": 2.019, "step": 6307 }, { "epoch": 2.1499659168370826, "grad_norm": 29.56503869616025, "learning_rate": 2.254695303648398e-06, "loss": 1.694, "step": 6308 }, { "epoch": 2.1503067484662575, "grad_norm": 14.371603787806224, "learning_rate": 2.253038097829282e-06, "loss": 1.1391, "step": 6309 }, { "epoch": 2.1506475800954328, "grad_norm": 15.74568257949557, "learning_rate": 2.2513813241182083e-06, "loss": 1.1577, "step": 6310 }, { "epoch": 2.150988411724608, "grad_norm": 13.805279977428047, "learning_rate": 2.2497249827757933e-06, "loss": 1.0711, "step": 6311 }, { "epoch": 2.1513292433537834, "grad_norm": 15.430223100886565, "learning_rate": 2.248069074062589e-06, "loss": 1.798, "step": 6312 }, { "epoch": 2.1516700749829583, "grad_norm": 16.055859525457173, "learning_rate": 2.2464135982390706e-06, "loss": 1.1075, "step": 6313 }, { "epoch": 2.1520109066121336, "grad_norm": 18.115216636453244, "learning_rate": 2.244758555565656e-06, "loss": 1.4734, "step": 6314 }, { "epoch": 2.152351738241309, "grad_norm": 12.401228940552873, "learning_rate": 2.2431039463026905e-06, "loss": 1.4334, "step": 6315 }, { "epoch": 2.152692569870484, "grad_norm": 18.352552767460196, "learning_rate": 2.2414497707104452e-06, "loss": 1.536, "step": 6316 }, { "epoch": 2.153033401499659, "grad_norm": 17.58302222505736, "learning_rate": 2.239796029049136e-06, "loss": 1.8262, "step": 6317 }, { "epoch": 2.1533742331288344, "grad_norm": 14.190271732944463, "learning_rate": 2.238142721578897e-06, "loss": 1.0323, "step": 6318 }, { "epoch": 2.1537150647580097, "grad_norm": 12.862328883682183, "learning_rate": 2.236489848559801e-06, "loss": 1.4266, "step": 6319 }, { "epoch": 2.1540558963871845, "grad_norm": 12.906559852559852, "learning_rate": 2.2348374102518557e-06, "loss": 1.6675, "step": 6320 }, { "epoch": 2.15439672801636, "grad_norm": 10.214590012011454, "learning_rate": 2.2331854069149915e-06, "loss": 1.0753, "step": 6321 }, { "epoch": 2.154737559645535, "grad_norm": 27.434029352500055, "learning_rate": 2.231533838809077e-06, "loss": 1.6348, "step": 6322 }, { "epoch": 2.1550783912747105, "grad_norm": 13.111384881797928, "learning_rate": 2.2298827061939106e-06, "loss": 1.1765, "step": 6323 }, { "epoch": 2.1554192229038853, "grad_norm": 19.889375046734504, "learning_rate": 2.228232009329221e-06, "loss": 1.7678, "step": 6324 }, { "epoch": 2.1557600545330606, "grad_norm": 11.892246934634473, "learning_rate": 2.226581748474673e-06, "loss": 1.4445, "step": 6325 }, { "epoch": 2.156100886162236, "grad_norm": 15.438553570371866, "learning_rate": 2.2249319238898517e-06, "loss": 1.6629, "step": 6326 }, { "epoch": 2.1564417177914113, "grad_norm": 15.707225574518112, "learning_rate": 2.2232825358342875e-06, "loss": 1.8786, "step": 6327 }, { "epoch": 2.156782549420586, "grad_norm": 16.563688564829164, "learning_rate": 2.2216335845674357e-06, "loss": 1.4408, "step": 6328 }, { "epoch": 2.1571233810497614, "grad_norm": 15.144408240444719, "learning_rate": 2.219985070348678e-06, "loss": 1.7102, "step": 6329 }, { "epoch": 2.1574642126789367, "grad_norm": 14.794781650821832, "learning_rate": 2.2183369934373365e-06, "loss": 1.5902, "step": 6330 }, { "epoch": 2.1578050443081116, "grad_norm": 27.486051435949136, "learning_rate": 2.2166893540926616e-06, "loss": 1.386, "step": 6331 }, { "epoch": 2.158145875937287, "grad_norm": 11.646321489375513, "learning_rate": 2.215042152573827e-06, "loss": 1.4574, "step": 6332 }, { "epoch": 2.158486707566462, "grad_norm": 20.523745214290635, "learning_rate": 2.213395389139952e-06, "loss": 1.0599, "step": 6333 }, { "epoch": 2.1588275391956375, "grad_norm": 12.162412670277105, "learning_rate": 2.2117490640500732e-06, "loss": 1.1046, "step": 6334 }, { "epoch": 2.1591683708248124, "grad_norm": 10.15939306205793, "learning_rate": 2.2101031775631636e-06, "loss": 1.0846, "step": 6335 }, { "epoch": 2.1595092024539877, "grad_norm": 16.770820483153624, "learning_rate": 2.2084577299381343e-06, "loss": 1.6056, "step": 6336 }, { "epoch": 2.159850034083163, "grad_norm": 19.05263620079242, "learning_rate": 2.2068127214338148e-06, "loss": 1.9478, "step": 6337 }, { "epoch": 2.1601908657123383, "grad_norm": 10.879126531796443, "learning_rate": 2.2051681523089736e-06, "loss": 1.1732, "step": 6338 }, { "epoch": 2.160531697341513, "grad_norm": 15.928616613597537, "learning_rate": 2.203524022822307e-06, "loss": 1.7771, "step": 6339 }, { "epoch": 2.1608725289706885, "grad_norm": 12.442965519365064, "learning_rate": 2.2018803332324434e-06, "loss": 1.249, "step": 6340 }, { "epoch": 2.161213360599864, "grad_norm": 9.440013962507871, "learning_rate": 2.2002370837979432e-06, "loss": 0.8019, "step": 6341 }, { "epoch": 2.1615541922290387, "grad_norm": 13.91365538960959, "learning_rate": 2.1985942747772935e-06, "loss": 1.2259, "step": 6342 }, { "epoch": 2.161895023858214, "grad_norm": 15.122917761091601, "learning_rate": 2.1969519064289173e-06, "loss": 1.3791, "step": 6343 }, { "epoch": 2.1622358554873893, "grad_norm": 18.443374420930702, "learning_rate": 2.195309979011164e-06, "loss": 0.8839, "step": 6344 }, { "epoch": 2.1625766871165646, "grad_norm": 14.890768047774994, "learning_rate": 2.193668492782316e-06, "loss": 1.4203, "step": 6345 }, { "epoch": 2.1629175187457395, "grad_norm": 12.387266316386958, "learning_rate": 2.192027448000587e-06, "loss": 1.0671, "step": 6346 }, { "epoch": 2.1632583503749148, "grad_norm": 16.951911721876236, "learning_rate": 2.190386844924115e-06, "loss": 2.0304, "step": 6347 }, { "epoch": 2.16359918200409, "grad_norm": 12.644114860147614, "learning_rate": 2.1887466838109785e-06, "loss": 1.415, "step": 6348 }, { "epoch": 2.1639400136332654, "grad_norm": 11.46502464980983, "learning_rate": 2.187106964919181e-06, "loss": 1.5237, "step": 6349 }, { "epoch": 2.1642808452624402, "grad_norm": 18.669099735779355, "learning_rate": 2.185467688506652e-06, "loss": 0.6629, "step": 6350 }, { "epoch": 2.1646216768916156, "grad_norm": 13.895882754611515, "learning_rate": 2.1838288548312637e-06, "loss": 1.4994, "step": 6351 }, { "epoch": 2.164962508520791, "grad_norm": 17.44497346450304, "learning_rate": 2.182190464150805e-06, "loss": 1.7141, "step": 6352 }, { "epoch": 2.1653033401499657, "grad_norm": 35.473048109376705, "learning_rate": 2.180552516723003e-06, "loss": 2.3086, "step": 6353 }, { "epoch": 2.165644171779141, "grad_norm": 12.247473240347823, "learning_rate": 2.178915012805514e-06, "loss": 1.1235, "step": 6354 }, { "epoch": 2.1659850034083163, "grad_norm": 14.248074463799268, "learning_rate": 2.1772779526559236e-06, "loss": 1.8417, "step": 6355 }, { "epoch": 2.1663258350374917, "grad_norm": 14.923766009925679, "learning_rate": 2.1756413365317474e-06, "loss": 1.1367, "step": 6356 }, { "epoch": 2.1666666666666665, "grad_norm": 13.147271408064467, "learning_rate": 2.1740051646904326e-06, "loss": 1.3994, "step": 6357 }, { "epoch": 2.167007498295842, "grad_norm": 17.034188150863276, "learning_rate": 2.172369437389355e-06, "loss": 1.0711, "step": 6358 }, { "epoch": 2.167348329925017, "grad_norm": 40.54572385022194, "learning_rate": 2.1707341548858205e-06, "loss": 1.6721, "step": 6359 }, { "epoch": 2.1676891615541924, "grad_norm": 15.880501905552856, "learning_rate": 2.1690993174370665e-06, "loss": 1.9122, "step": 6360 }, { "epoch": 2.1680299931833673, "grad_norm": 10.12951013549337, "learning_rate": 2.1674649253002593e-06, "loss": 1.2556, "step": 6361 }, { "epoch": 2.1683708248125426, "grad_norm": 19.040057214766268, "learning_rate": 2.1658309787324967e-06, "loss": 1.8297, "step": 6362 }, { "epoch": 2.168711656441718, "grad_norm": 14.913006274703895, "learning_rate": 2.1641974779907994e-06, "loss": 1.7444, "step": 6363 }, { "epoch": 2.169052488070893, "grad_norm": 13.30040764449652, "learning_rate": 2.1625644233321293e-06, "loss": 1.0295, "step": 6364 }, { "epoch": 2.169393319700068, "grad_norm": 19.081520281653173, "learning_rate": 2.1609318150133724e-06, "loss": 1.4962, "step": 6365 }, { "epoch": 2.1697341513292434, "grad_norm": 21.013875133402273, "learning_rate": 2.1592996532913397e-06, "loss": 1.7716, "step": 6366 }, { "epoch": 2.1700749829584187, "grad_norm": 15.380617361130238, "learning_rate": 2.1576679384227826e-06, "loss": 1.7069, "step": 6367 }, { "epoch": 2.1704158145875936, "grad_norm": 15.914795853630052, "learning_rate": 2.1560366706643716e-06, "loss": 1.9748, "step": 6368 }, { "epoch": 2.170756646216769, "grad_norm": 12.65551143407503, "learning_rate": 2.1544058502727123e-06, "loss": 1.3196, "step": 6369 }, { "epoch": 2.171097477845944, "grad_norm": 16.574190277886533, "learning_rate": 2.1527754775043436e-06, "loss": 1.3115, "step": 6370 }, { "epoch": 2.1714383094751195, "grad_norm": 17.60910812495016, "learning_rate": 2.1511455526157244e-06, "loss": 1.8509, "step": 6371 }, { "epoch": 2.1717791411042944, "grad_norm": 20.32275941425562, "learning_rate": 2.149516075863251e-06, "loss": 2.0637, "step": 6372 }, { "epoch": 2.1721199727334697, "grad_norm": 22.21748755382518, "learning_rate": 2.1478870475032455e-06, "loss": 1.2435, "step": 6373 }, { "epoch": 2.172460804362645, "grad_norm": 15.584038186028081, "learning_rate": 2.1462584677919613e-06, "loss": 1.2235, "step": 6374 }, { "epoch": 2.17280163599182, "grad_norm": 18.042211102614484, "learning_rate": 2.1446303369855797e-06, "loss": 1.6339, "step": 6375 }, { "epoch": 2.173142467620995, "grad_norm": 16.092096332836586, "learning_rate": 2.1430026553402134e-06, "loss": 1.1807, "step": 6376 }, { "epoch": 2.1734832992501705, "grad_norm": 16.679976149027365, "learning_rate": 2.141375423111902e-06, "loss": 2.1089, "step": 6377 }, { "epoch": 2.173824130879346, "grad_norm": 16.138266883323823, "learning_rate": 2.1397486405566166e-06, "loss": 1.6286, "step": 6378 }, { "epoch": 2.1741649625085206, "grad_norm": 12.251775688133684, "learning_rate": 2.1381223079302552e-06, "loss": 0.9213, "step": 6379 }, { "epoch": 2.174505794137696, "grad_norm": 15.13970053395009, "learning_rate": 2.1364964254886493e-06, "loss": 1.68, "step": 6380 }, { "epoch": 2.1748466257668713, "grad_norm": 12.311160746203376, "learning_rate": 2.1348709934875515e-06, "loss": 1.2173, "step": 6381 }, { "epoch": 2.1751874573960466, "grad_norm": 13.461168962281546, "learning_rate": 2.133246012182653e-06, "loss": 1.774, "step": 6382 }, { "epoch": 2.1755282890252214, "grad_norm": 28.50722205803135, "learning_rate": 2.131621481829571e-06, "loss": 1.6572, "step": 6383 }, { "epoch": 2.1758691206543967, "grad_norm": 16.231245992178028, "learning_rate": 2.1299974026838445e-06, "loss": 1.2849, "step": 6384 }, { "epoch": 2.176209952283572, "grad_norm": 12.086588333278401, "learning_rate": 2.128373775000955e-06, "loss": 1.4896, "step": 6385 }, { "epoch": 2.176550783912747, "grad_norm": 16.000258389633625, "learning_rate": 2.1267505990363e-06, "loss": 1.7448, "step": 6386 }, { "epoch": 2.1768916155419222, "grad_norm": 10.39648952375022, "learning_rate": 2.1251278750452136e-06, "loss": 1.2213, "step": 6387 }, { "epoch": 2.1772324471710975, "grad_norm": 16.641957976682935, "learning_rate": 2.123505603282957e-06, "loss": 2.1339, "step": 6388 }, { "epoch": 2.177573278800273, "grad_norm": 12.738859915713475, "learning_rate": 2.1218837840047193e-06, "loss": 1.1774, "step": 6389 }, { "epoch": 2.1779141104294477, "grad_norm": 19.918346723825316, "learning_rate": 2.1202624174656193e-06, "loss": 1.5249, "step": 6390 }, { "epoch": 2.178254942058623, "grad_norm": 17.152471415543843, "learning_rate": 2.118641503920705e-06, "loss": 1.5149, "step": 6391 }, { "epoch": 2.1785957736877983, "grad_norm": 16.199835621974575, "learning_rate": 2.1170210436249524e-06, "loss": 1.7361, "step": 6392 }, { "epoch": 2.1789366053169736, "grad_norm": 15.449693780672083, "learning_rate": 2.1154010368332658e-06, "loss": 1.3685, "step": 6393 }, { "epoch": 2.1792774369461485, "grad_norm": 15.51537089435563, "learning_rate": 2.113781483800479e-06, "loss": 1.5615, "step": 6394 }, { "epoch": 2.179618268575324, "grad_norm": 12.883989491980175, "learning_rate": 2.112162384781354e-06, "loss": 1.6957, "step": 6395 }, { "epoch": 2.179959100204499, "grad_norm": 17.756139577547703, "learning_rate": 2.110543740030583e-06, "loss": 1.1287, "step": 6396 }, { "epoch": 2.180299931833674, "grad_norm": 16.143448533018617, "learning_rate": 2.1089255498027815e-06, "loss": 1.4959, "step": 6397 }, { "epoch": 2.1806407634628493, "grad_norm": 16.432544628109877, "learning_rate": 2.107307814352501e-06, "loss": 0.9615, "step": 6398 }, { "epoch": 2.1809815950920246, "grad_norm": 17.095368777712203, "learning_rate": 2.1056905339342183e-06, "loss": 1.5621, "step": 6399 }, { "epoch": 2.1813224267212, "grad_norm": 39.38023358002484, "learning_rate": 2.1040737088023323e-06, "loss": 1.4668, "step": 6400 }, { "epoch": 2.181663258350375, "grad_norm": 13.391953166726896, "learning_rate": 2.102457339211184e-06, "loss": 1.544, "step": 6401 }, { "epoch": 2.18200408997955, "grad_norm": 19.3288018789814, "learning_rate": 2.1008414254150285e-06, "loss": 1.6265, "step": 6402 }, { "epoch": 2.1823449216087254, "grad_norm": 10.271870858974413, "learning_rate": 2.099225967668056e-06, "loss": 1.1742, "step": 6403 }, { "epoch": 2.1826857532379007, "grad_norm": 14.110353664663606, "learning_rate": 2.09761096622439e-06, "loss": 1.3137, "step": 6404 }, { "epoch": 2.1830265848670756, "grad_norm": 15.943071250867334, "learning_rate": 2.0959964213380707e-06, "loss": 1.2619, "step": 6405 }, { "epoch": 2.183367416496251, "grad_norm": 13.945995099717063, "learning_rate": 2.094382333263074e-06, "loss": 0.9038, "step": 6406 }, { "epoch": 2.183708248125426, "grad_norm": 13.748988116044995, "learning_rate": 2.0927687022533034e-06, "loss": 1.6231, "step": 6407 }, { "epoch": 2.184049079754601, "grad_norm": 17.17819409058399, "learning_rate": 2.091155528562589e-06, "loss": 1.7235, "step": 6408 }, { "epoch": 2.1843899113837764, "grad_norm": 17.71343140503629, "learning_rate": 2.0895428124446893e-06, "loss": 1.5126, "step": 6409 }, { "epoch": 2.1847307430129517, "grad_norm": 15.536612152165866, "learning_rate": 2.0879305541532914e-06, "loss": 1.3987, "step": 6410 }, { "epoch": 2.185071574642127, "grad_norm": 15.93570038352703, "learning_rate": 2.0863187539420086e-06, "loss": 1.4199, "step": 6411 }, { "epoch": 2.185412406271302, "grad_norm": 12.485677181701922, "learning_rate": 2.084707412064386e-06, "loss": 1.2827, "step": 6412 }, { "epoch": 2.185753237900477, "grad_norm": 16.684775772608106, "learning_rate": 2.0830965287738914e-06, "loss": 1.3299, "step": 6413 }, { "epoch": 2.1860940695296525, "grad_norm": 15.051046825273373, "learning_rate": 2.0814861043239265e-06, "loss": 1.7206, "step": 6414 }, { "epoch": 2.1864349011588278, "grad_norm": 15.458450275371668, "learning_rate": 2.079876138967811e-06, "loss": 1.3315, "step": 6415 }, { "epoch": 2.1867757327880026, "grad_norm": 15.367257372051684, "learning_rate": 2.0782666329588063e-06, "loss": 1.2881, "step": 6416 }, { "epoch": 2.187116564417178, "grad_norm": 15.12405283754705, "learning_rate": 2.076657586550092e-06, "loss": 1.6269, "step": 6417 }, { "epoch": 2.1874573960463533, "grad_norm": 11.620793368235901, "learning_rate": 2.075048999994775e-06, "loss": 1.296, "step": 6418 }, { "epoch": 2.187798227675528, "grad_norm": 12.452814330984637, "learning_rate": 2.073440873545894e-06, "loss": 1.5389, "step": 6419 }, { "epoch": 2.1881390593047034, "grad_norm": 13.026583857583542, "learning_rate": 2.0718332074564128e-06, "loss": 0.6218, "step": 6420 }, { "epoch": 2.1884798909338787, "grad_norm": 18.631756281772184, "learning_rate": 2.0702260019792254e-06, "loss": 1.0899, "step": 6421 }, { "epoch": 2.188820722563054, "grad_norm": 19.98006429638065, "learning_rate": 2.0686192573671497e-06, "loss": 1.7855, "step": 6422 }, { "epoch": 2.189161554192229, "grad_norm": 11.588220349307065, "learning_rate": 2.0670129738729346e-06, "loss": 1.1143, "step": 6423 }, { "epoch": 2.189502385821404, "grad_norm": 15.782831900499858, "learning_rate": 2.065407151749254e-06, "loss": 1.7666, "step": 6424 }, { "epoch": 2.1898432174505795, "grad_norm": 19.735154725975136, "learning_rate": 2.0638017912487103e-06, "loss": 2.3097, "step": 6425 }, { "epoch": 2.190184049079755, "grad_norm": 19.697308639032492, "learning_rate": 2.0621968926238335e-06, "loss": 1.6927, "step": 6426 }, { "epoch": 2.1905248807089297, "grad_norm": 15.292764729678327, "learning_rate": 2.0605924561270803e-06, "loss": 1.5597, "step": 6427 }, { "epoch": 2.190865712338105, "grad_norm": 18.87214337617264, "learning_rate": 2.058988482010834e-06, "loss": 1.9717, "step": 6428 }, { "epoch": 2.1912065439672803, "grad_norm": 13.311963100096081, "learning_rate": 2.057384970527408e-06, "loss": 1.3603, "step": 6429 }, { "epoch": 2.191547375596455, "grad_norm": 16.86011369358606, "learning_rate": 2.055781921929041e-06, "loss": 1.1594, "step": 6430 }, { "epoch": 2.1918882072256305, "grad_norm": 13.874629789662377, "learning_rate": 2.054179336467895e-06, "loss": 1.9579, "step": 6431 }, { "epoch": 2.192229038854806, "grad_norm": 11.897802095194654, "learning_rate": 2.052577214396067e-06, "loss": 1.1727, "step": 6432 }, { "epoch": 2.192569870483981, "grad_norm": 12.908782141407919, "learning_rate": 2.0509755559655785e-06, "loss": 1.4319, "step": 6433 }, { "epoch": 2.192910702113156, "grad_norm": 11.85418597997109, "learning_rate": 2.049374361428371e-06, "loss": 0.9313, "step": 6434 }, { "epoch": 2.1932515337423313, "grad_norm": 14.012202622661121, "learning_rate": 2.0477736310363257e-06, "loss": 1.4235, "step": 6435 }, { "epoch": 2.1935923653715066, "grad_norm": 12.573472144784116, "learning_rate": 2.0461733650412385e-06, "loss": 1.2747, "step": 6436 }, { "epoch": 2.1939331970006815, "grad_norm": 15.23020182682256, "learning_rate": 2.044573563694838e-06, "loss": 1.0825, "step": 6437 }, { "epoch": 2.1942740286298568, "grad_norm": 14.774945170242264, "learning_rate": 2.0429742272487833e-06, "loss": 1.0683, "step": 6438 }, { "epoch": 2.194614860259032, "grad_norm": 15.84066270419507, "learning_rate": 2.041375355954653e-06, "loss": 1.3354, "step": 6439 }, { "epoch": 2.1949556918882074, "grad_norm": 17.284715698514063, "learning_rate": 2.039776950063957e-06, "loss": 1.244, "step": 6440 }, { "epoch": 2.1952965235173822, "grad_norm": 13.345719247077106, "learning_rate": 2.03817900982813e-06, "loss": 1.6978, "step": 6441 }, { "epoch": 2.1956373551465576, "grad_norm": 14.180599566631459, "learning_rate": 2.036581535498536e-06, "loss": 1.4292, "step": 6442 }, { "epoch": 2.195978186775733, "grad_norm": 14.59112866912174, "learning_rate": 2.0349845273264625e-06, "loss": 1.4585, "step": 6443 }, { "epoch": 2.196319018404908, "grad_norm": 16.30304737634381, "learning_rate": 2.0333879855631255e-06, "loss": 1.901, "step": 6444 }, { "epoch": 2.196659850034083, "grad_norm": 17.04383123899611, "learning_rate": 2.0317919104596685e-06, "loss": 1.3474, "step": 6445 }, { "epoch": 2.1970006816632583, "grad_norm": 17.635220869074534, "learning_rate": 2.0301963022671593e-06, "loss": 1.1978, "step": 6446 }, { "epoch": 2.1973415132924337, "grad_norm": 18.44514853040743, "learning_rate": 2.0286011612365947e-06, "loss": 1.5502, "step": 6447 }, { "epoch": 2.1976823449216085, "grad_norm": 13.503229030792665, "learning_rate": 2.027006487618895e-06, "loss": 1.2718, "step": 6448 }, { "epoch": 2.198023176550784, "grad_norm": 22.536046635772795, "learning_rate": 2.0254122816649095e-06, "loss": 1.3576, "step": 6449 }, { "epoch": 2.198364008179959, "grad_norm": 16.67582392371029, "learning_rate": 2.0238185436254136e-06, "loss": 1.987, "step": 6450 }, { "epoch": 2.1987048398091344, "grad_norm": 21.345995398979394, "learning_rate": 2.0222252737511107e-06, "loss": 0.9905, "step": 6451 }, { "epoch": 2.1990456714383093, "grad_norm": 12.239400146121898, "learning_rate": 2.0206324722926234e-06, "loss": 1.9195, "step": 6452 }, { "epoch": 2.1993865030674846, "grad_norm": 13.65888724735172, "learning_rate": 2.019040139500507e-06, "loss": 1.4334, "step": 6453 }, { "epoch": 2.19972733469666, "grad_norm": 15.883565609858454, "learning_rate": 2.0174482756252465e-06, "loss": 1.6715, "step": 6454 }, { "epoch": 2.2000681663258352, "grad_norm": 17.25538765312383, "learning_rate": 2.0158568809172434e-06, "loss": 1.433, "step": 6455 }, { "epoch": 2.20040899795501, "grad_norm": 18.399164329610336, "learning_rate": 2.0142659556268325e-06, "loss": 0.9613, "step": 6456 }, { "epoch": 2.2007498295841854, "grad_norm": 14.627464525230518, "learning_rate": 2.012675500004272e-06, "loss": 1.2869, "step": 6457 }, { "epoch": 2.2010906612133607, "grad_norm": 19.11536646793278, "learning_rate": 2.011085514299747e-06, "loss": 1.736, "step": 6458 }, { "epoch": 2.2014314928425356, "grad_norm": 13.106918586112219, "learning_rate": 2.009495998763369e-06, "loss": 1.2206, "step": 6459 }, { "epoch": 2.201772324471711, "grad_norm": 19.36996854911595, "learning_rate": 2.0079069536451744e-06, "loss": 1.8391, "step": 6460 }, { "epoch": 2.202113156100886, "grad_norm": 18.09341913371272, "learning_rate": 2.006318379195127e-06, "loss": 1.6575, "step": 6461 }, { "epoch": 2.2024539877300615, "grad_norm": 11.865955872365587, "learning_rate": 2.0047302756631165e-06, "loss": 1.2597, "step": 6462 }, { "epoch": 2.2027948193592364, "grad_norm": 13.595200795880697, "learning_rate": 2.0031426432989566e-06, "loss": 1.753, "step": 6463 }, { "epoch": 2.2031356509884117, "grad_norm": 17.879354813875253, "learning_rate": 2.0015554823523904e-06, "loss": 1.9127, "step": 6464 }, { "epoch": 2.203476482617587, "grad_norm": 14.70358075999326, "learning_rate": 1.999968793073079e-06, "loss": 1.6472, "step": 6465 }, { "epoch": 2.2038173142467623, "grad_norm": 14.11642947962428, "learning_rate": 1.998382575710622e-06, "loss": 1.4666, "step": 6466 }, { "epoch": 2.204158145875937, "grad_norm": 10.780782530061988, "learning_rate": 1.996796830514535e-06, "loss": 1.1249, "step": 6467 }, { "epoch": 2.2044989775051125, "grad_norm": 14.13623041862937, "learning_rate": 1.9952115577342584e-06, "loss": 1.0541, "step": 6468 }, { "epoch": 2.204839809134288, "grad_norm": 23.483169914290396, "learning_rate": 1.993626757619169e-06, "loss": 1.8315, "step": 6469 }, { "epoch": 2.2051806407634627, "grad_norm": 13.238950035767267, "learning_rate": 1.9920424304185564e-06, "loss": 0.7205, "step": 6470 }, { "epoch": 2.205521472392638, "grad_norm": 12.203623783738196, "learning_rate": 1.9904585763816416e-06, "loss": 1.5955, "step": 6471 }, { "epoch": 2.2058623040218133, "grad_norm": 14.18030250162095, "learning_rate": 1.988875195757577e-06, "loss": 1.4373, "step": 6472 }, { "epoch": 2.2062031356509886, "grad_norm": 8.660776631691814, "learning_rate": 1.9872922887954283e-06, "loss": 1.0617, "step": 6473 }, { "epoch": 2.2065439672801634, "grad_norm": 10.212027473864431, "learning_rate": 1.9857098557441957e-06, "loss": 1.2248, "step": 6474 }, { "epoch": 2.2068847989093388, "grad_norm": 14.756276743058947, "learning_rate": 1.9841278968528023e-06, "loss": 1.6293, "step": 6475 }, { "epoch": 2.207225630538514, "grad_norm": 18.953617319206217, "learning_rate": 1.9825464123700957e-06, "loss": 1.7257, "step": 6476 }, { "epoch": 2.2075664621676894, "grad_norm": 40.5485771978725, "learning_rate": 1.9809654025448504e-06, "loss": 1.5301, "step": 6477 }, { "epoch": 2.2079072937968642, "grad_norm": 19.266794591612474, "learning_rate": 1.9793848676257654e-06, "loss": 1.037, "step": 6478 }, { "epoch": 2.2082481254260395, "grad_norm": 12.154146021072064, "learning_rate": 1.9778048078614652e-06, "loss": 1.3016, "step": 6479 }, { "epoch": 2.208588957055215, "grad_norm": 15.957899876355057, "learning_rate": 1.976225223500499e-06, "loss": 0.8797, "step": 6480 }, { "epoch": 2.2089297886843897, "grad_norm": 15.520924038882065, "learning_rate": 1.9746461147913425e-06, "loss": 1.3537, "step": 6481 }, { "epoch": 2.209270620313565, "grad_norm": 12.469371318419638, "learning_rate": 1.9730674819823955e-06, "loss": 1.3363, "step": 6482 }, { "epoch": 2.2096114519427403, "grad_norm": 16.067927289497664, "learning_rate": 1.9714893253219846e-06, "loss": 1.1516, "step": 6483 }, { "epoch": 2.2099522835719156, "grad_norm": 37.46801090564371, "learning_rate": 1.9699116450583557e-06, "loss": 1.944, "step": 6484 }, { "epoch": 2.2102931152010905, "grad_norm": 19.95398404261129, "learning_rate": 1.96833444143969e-06, "loss": 2.3612, "step": 6485 }, { "epoch": 2.210633946830266, "grad_norm": 12.940627792476441, "learning_rate": 1.966757714714083e-06, "loss": 1.3862, "step": 6486 }, { "epoch": 2.210974778459441, "grad_norm": 40.64430524180224, "learning_rate": 1.9651814651295607e-06, "loss": 1.5898, "step": 6487 }, { "epoch": 2.211315610088616, "grad_norm": 9.458303138233253, "learning_rate": 1.9636056929340787e-06, "loss": 1.263, "step": 6488 }, { "epoch": 2.2116564417177913, "grad_norm": 15.711740164787109, "learning_rate": 1.962030398375506e-06, "loss": 1.724, "step": 6489 }, { "epoch": 2.2119972733469666, "grad_norm": 13.718245982110663, "learning_rate": 1.960455581701646e-06, "loss": 1.4039, "step": 6490 }, { "epoch": 2.212338104976142, "grad_norm": 14.651702908462436, "learning_rate": 1.9588812431602216e-06, "loss": 1.7011, "step": 6491 }, { "epoch": 2.212678936605317, "grad_norm": 18.66852543012691, "learning_rate": 1.957307382998884e-06, "loss": 1.4043, "step": 6492 }, { "epoch": 2.213019768234492, "grad_norm": 15.478193013825067, "learning_rate": 1.9557340014652076e-06, "loss": 1.7288, "step": 6493 }, { "epoch": 2.2133605998636674, "grad_norm": 12.569206646060323, "learning_rate": 1.9541610988066913e-06, "loss": 1.249, "step": 6494 }, { "epoch": 2.2137014314928427, "grad_norm": 16.626633553507556, "learning_rate": 1.952588675270759e-06, "loss": 1.1383, "step": 6495 }, { "epoch": 2.2140422631220176, "grad_norm": 17.970293185044994, "learning_rate": 1.9510167311047588e-06, "loss": 1.6964, "step": 6496 }, { "epoch": 2.214383094751193, "grad_norm": 12.073703458347495, "learning_rate": 1.9494452665559645e-06, "loss": 1.1157, "step": 6497 }, { "epoch": 2.214723926380368, "grad_norm": 27.137770780891103, "learning_rate": 1.9478742818715747e-06, "loss": 1.045, "step": 6498 }, { "epoch": 2.215064758009543, "grad_norm": 14.669512390597491, "learning_rate": 1.9463037772987065e-06, "loss": 1.5316, "step": 6499 }, { "epoch": 2.2154055896387184, "grad_norm": 13.199730797803864, "learning_rate": 1.9447337530844125e-06, "loss": 1.221, "step": 6500 }, { "epoch": 2.2157464212678937, "grad_norm": 12.447744324948069, "learning_rate": 1.943164209475663e-06, "loss": 1.5117, "step": 6501 }, { "epoch": 2.216087252897069, "grad_norm": 12.97123653004605, "learning_rate": 1.9415951467193484e-06, "loss": 1.3554, "step": 6502 }, { "epoch": 2.216428084526244, "grad_norm": 12.567080263412016, "learning_rate": 1.9400265650622952e-06, "loss": 1.3284, "step": 6503 }, { "epoch": 2.216768916155419, "grad_norm": 12.790705954569662, "learning_rate": 1.9384584647512427e-06, "loss": 1.3197, "step": 6504 }, { "epoch": 2.2171097477845945, "grad_norm": 29.907757798630694, "learning_rate": 1.9368908460328595e-06, "loss": 1.6387, "step": 6505 }, { "epoch": 2.2174505794137698, "grad_norm": 13.035993807296924, "learning_rate": 1.9353237091537435e-06, "loss": 1.4484, "step": 6506 }, { "epoch": 2.2177914110429446, "grad_norm": 16.373808456331588, "learning_rate": 1.9337570543604056e-06, "loss": 1.5215, "step": 6507 }, { "epoch": 2.21813224267212, "grad_norm": 18.245489748991968, "learning_rate": 1.932190881899289e-06, "loss": 1.4155, "step": 6508 }, { "epoch": 2.2184730743012953, "grad_norm": 14.717471221205411, "learning_rate": 1.930625192016759e-06, "loss": 1.0236, "step": 6509 }, { "epoch": 2.21881390593047, "grad_norm": 11.515759628138111, "learning_rate": 1.929059984959105e-06, "loss": 1.2485, "step": 6510 }, { "epoch": 2.2191547375596454, "grad_norm": 15.360978071685393, "learning_rate": 1.92749526097254e-06, "loss": 1.2809, "step": 6511 }, { "epoch": 2.2194955691888207, "grad_norm": 11.409868419742354, "learning_rate": 1.9259310203032017e-06, "loss": 1.2296, "step": 6512 }, { "epoch": 2.219836400817996, "grad_norm": 10.987449250183968, "learning_rate": 1.9243672631971504e-06, "loss": 1.1509, "step": 6513 }, { "epoch": 2.220177232447171, "grad_norm": 17.431247476551846, "learning_rate": 1.9228039899003727e-06, "loss": 1.6348, "step": 6514 }, { "epoch": 2.220518064076346, "grad_norm": 12.338911222271015, "learning_rate": 1.921241200658776e-06, "loss": 1.5794, "step": 6515 }, { "epoch": 2.2208588957055215, "grad_norm": 14.532102731099304, "learning_rate": 1.919678895718195e-06, "loss": 1.1922, "step": 6516 }, { "epoch": 2.221199727334697, "grad_norm": 15.597162273870454, "learning_rate": 1.918117075324388e-06, "loss": 1.4518, "step": 6517 }, { "epoch": 2.2215405589638717, "grad_norm": 17.961680704497134, "learning_rate": 1.916555739723029e-06, "loss": 1.5674, "step": 6518 }, { "epoch": 2.221881390593047, "grad_norm": 16.55423249143276, "learning_rate": 1.91499488915973e-06, "loss": 1.2792, "step": 6519 }, { "epoch": 2.2222222222222223, "grad_norm": 15.23481547948054, "learning_rate": 1.913434523880014e-06, "loss": 1.3796, "step": 6520 }, { "epoch": 2.222563053851397, "grad_norm": 20.475637293451495, "learning_rate": 1.911874644129333e-06, "loss": 1.8474, "step": 6521 }, { "epoch": 2.2229038854805725, "grad_norm": 15.583183737449264, "learning_rate": 1.910315250153067e-06, "loss": 1.8963, "step": 6522 }, { "epoch": 2.223244717109748, "grad_norm": 14.575491941897035, "learning_rate": 1.9087563421965096e-06, "loss": 1.6655, "step": 6523 }, { "epoch": 2.223585548738923, "grad_norm": 11.047610225651493, "learning_rate": 1.9071979205048846e-06, "loss": 1.3768, "step": 6524 }, { "epoch": 2.223926380368098, "grad_norm": 14.601209344730876, "learning_rate": 1.9056399853233393e-06, "loss": 1.754, "step": 6525 }, { "epoch": 2.2242672119972733, "grad_norm": 22.96785851678997, "learning_rate": 1.9040825368969417e-06, "loss": 1.9469, "step": 6526 }, { "epoch": 2.2246080436264486, "grad_norm": 15.343330816281096, "learning_rate": 1.9025255754706851e-06, "loss": 1.8913, "step": 6527 }, { "epoch": 2.224948875255624, "grad_norm": 10.454000577564434, "learning_rate": 1.900969101289486e-06, "loss": 1.0823, "step": 6528 }, { "epoch": 2.2252897068847988, "grad_norm": 20.491338080746253, "learning_rate": 1.8994131145981843e-06, "loss": 1.3297, "step": 6529 }, { "epoch": 2.225630538513974, "grad_norm": 15.052290501737469, "learning_rate": 1.8978576156415419e-06, "loss": 1.3864, "step": 6530 }, { "epoch": 2.2259713701431494, "grad_norm": 40.095346596565456, "learning_rate": 1.8963026046642452e-06, "loss": 1.7226, "step": 6531 }, { "epoch": 2.2263122017723243, "grad_norm": 12.167137513195536, "learning_rate": 1.894748081910906e-06, "loss": 1.6773, "step": 6532 }, { "epoch": 2.2266530334014996, "grad_norm": 16.39952733244793, "learning_rate": 1.8931940476260512e-06, "loss": 1.7749, "step": 6533 }, { "epoch": 2.226993865030675, "grad_norm": 19.734943475469816, "learning_rate": 1.8916405020541417e-06, "loss": 0.9451, "step": 6534 }, { "epoch": 2.22733469665985, "grad_norm": 19.70249466153235, "learning_rate": 1.8900874454395574e-06, "loss": 1.63, "step": 6535 }, { "epoch": 2.227675528289025, "grad_norm": 14.881042185130394, "learning_rate": 1.8885348780265933e-06, "loss": 1.5483, "step": 6536 }, { "epoch": 2.2280163599182004, "grad_norm": 19.665293350715118, "learning_rate": 1.8869828000594814e-06, "loss": 1.6998, "step": 6537 }, { "epoch": 2.2283571915473757, "grad_norm": 14.381097723023993, "learning_rate": 1.8854312117823686e-06, "loss": 1.4899, "step": 6538 }, { "epoch": 2.228698023176551, "grad_norm": 19.217314749113985, "learning_rate": 1.8838801134393215e-06, "loss": 1.396, "step": 6539 }, { "epoch": 2.229038854805726, "grad_norm": 15.885545051289213, "learning_rate": 1.8823295052743402e-06, "loss": 1.9355, "step": 6540 }, { "epoch": 2.229379686434901, "grad_norm": 24.510706743564164, "learning_rate": 1.880779387531337e-06, "loss": 1.3089, "step": 6541 }, { "epoch": 2.2297205180640765, "grad_norm": 12.815977624665244, "learning_rate": 1.879229760454151e-06, "loss": 1.679, "step": 6542 }, { "epoch": 2.2300613496932513, "grad_norm": 13.14758397161614, "learning_rate": 1.8776806242865502e-06, "loss": 1.4149, "step": 6543 }, { "epoch": 2.2304021813224266, "grad_norm": 14.047469616234547, "learning_rate": 1.876131979272215e-06, "loss": 1.3565, "step": 6544 }, { "epoch": 2.230743012951602, "grad_norm": 14.068351636930561, "learning_rate": 1.8745838256547549e-06, "loss": 1.3718, "step": 6545 }, { "epoch": 2.2310838445807772, "grad_norm": 17.038843269308057, "learning_rate": 1.8730361636777e-06, "loss": 1.4993, "step": 6546 }, { "epoch": 2.231424676209952, "grad_norm": 13.579036108938267, "learning_rate": 1.8714889935845048e-06, "loss": 1.5445, "step": 6547 }, { "epoch": 2.2317655078391274, "grad_norm": 12.888503285827086, "learning_rate": 1.8699423156185464e-06, "loss": 1.2871, "step": 6548 }, { "epoch": 2.2321063394683027, "grad_norm": 18.128745650884326, "learning_rate": 1.8683961300231184e-06, "loss": 2.1477, "step": 6549 }, { "epoch": 2.232447171097478, "grad_norm": 20.25138243744824, "learning_rate": 1.866850437041447e-06, "loss": 1.5088, "step": 6550 }, { "epoch": 2.232788002726653, "grad_norm": 16.106754373829624, "learning_rate": 1.8653052369166763e-06, "loss": 1.6371, "step": 6551 }, { "epoch": 2.233128834355828, "grad_norm": 15.932182798514813, "learning_rate": 1.8637605298918665e-06, "loss": 1.7898, "step": 6552 }, { "epoch": 2.2334696659850035, "grad_norm": 17.33878609967074, "learning_rate": 1.8622163162100132e-06, "loss": 1.5606, "step": 6553 }, { "epoch": 2.2338104976141784, "grad_norm": 23.91904427278865, "learning_rate": 1.8606725961140226e-06, "loss": 1.1589, "step": 6554 }, { "epoch": 2.2341513292433537, "grad_norm": 15.179722079838037, "learning_rate": 1.8591293698467283e-06, "loss": 1.672, "step": 6555 }, { "epoch": 2.234492160872529, "grad_norm": 14.915975593618938, "learning_rate": 1.85758663765089e-06, "loss": 0.867, "step": 6556 }, { "epoch": 2.2348329925017043, "grad_norm": 25.860759379754995, "learning_rate": 1.8560443997691812e-06, "loss": 1.7142, "step": 6557 }, { "epoch": 2.235173824130879, "grad_norm": 16.98513394376174, "learning_rate": 1.8545026564442043e-06, "loss": 1.6397, "step": 6558 }, { "epoch": 2.2355146557600545, "grad_norm": 21.24468394230733, "learning_rate": 1.8529614079184803e-06, "loss": 1.7228, "step": 6559 }, { "epoch": 2.23585548738923, "grad_norm": 36.407818436233065, "learning_rate": 1.8514206544344548e-06, "loss": 1.3438, "step": 6560 }, { "epoch": 2.236196319018405, "grad_norm": 15.007186064432238, "learning_rate": 1.8498803962344942e-06, "loss": 1.9534, "step": 6561 }, { "epoch": 2.23653715064758, "grad_norm": 26.70383542113246, "learning_rate": 1.8483406335608873e-06, "loss": 1.6615, "step": 6562 }, { "epoch": 2.2368779822767553, "grad_norm": 19.325200850443554, "learning_rate": 1.8468013666558444e-06, "loss": 1.2694, "step": 6563 }, { "epoch": 2.2372188139059306, "grad_norm": 17.02716933084877, "learning_rate": 1.845262595761499e-06, "loss": 1.7997, "step": 6564 }, { "epoch": 2.2375596455351054, "grad_norm": 14.225012667572482, "learning_rate": 1.8437243211199052e-06, "loss": 1.4447, "step": 6565 }, { "epoch": 2.2379004771642808, "grad_norm": 7.047782923070132, "learning_rate": 1.8421865429730401e-06, "loss": 1.1337, "step": 6566 }, { "epoch": 2.238241308793456, "grad_norm": 17.625283390579007, "learning_rate": 1.840649261562803e-06, "loss": 1.9038, "step": 6567 }, { "epoch": 2.2385821404226314, "grad_norm": 13.983312899163542, "learning_rate": 1.839112477131013e-06, "loss": 1.2028, "step": 6568 }, { "epoch": 2.2389229720518062, "grad_norm": 24.86049864507031, "learning_rate": 1.837576189919415e-06, "loss": 1.3069, "step": 6569 }, { "epoch": 2.2392638036809815, "grad_norm": 12.261633560768544, "learning_rate": 1.836040400169668e-06, "loss": 1.4366, "step": 6570 }, { "epoch": 2.239604635310157, "grad_norm": 13.910338472745188, "learning_rate": 1.8345051081233622e-06, "loss": 0.9717, "step": 6571 }, { "epoch": 2.239945466939332, "grad_norm": 13.208122956180627, "learning_rate": 1.8329703140220063e-06, "loss": 0.9903, "step": 6572 }, { "epoch": 2.240286298568507, "grad_norm": 9.928422635037355, "learning_rate": 1.8314360181070234e-06, "loss": 1.3434, "step": 6573 }, { "epoch": 2.2406271301976823, "grad_norm": 19.232732848435422, "learning_rate": 1.829902220619772e-06, "loss": 1.4033, "step": 6574 }, { "epoch": 2.2409679618268576, "grad_norm": 11.01618517275507, "learning_rate": 1.8283689218015189e-06, "loss": 1.5363, "step": 6575 }, { "epoch": 2.2413087934560325, "grad_norm": 21.173436992937308, "learning_rate": 1.8268361218934583e-06, "loss": 1.3145, "step": 6576 }, { "epoch": 2.241649625085208, "grad_norm": 13.033733016733667, "learning_rate": 1.8253038211367102e-06, "loss": 1.4892, "step": 6577 }, { "epoch": 2.241990456714383, "grad_norm": 16.235827901469136, "learning_rate": 1.8237720197723075e-06, "loss": 1.9435, "step": 6578 }, { "epoch": 2.2423312883435584, "grad_norm": 16.28078392080204, "learning_rate": 1.8222407180412098e-06, "loss": 1.4583, "step": 6579 }, { "epoch": 2.2426721199727333, "grad_norm": 16.103211339345968, "learning_rate": 1.8207099161842972e-06, "loss": 1.5292, "step": 6580 }, { "epoch": 2.2430129516019086, "grad_norm": 17.64713186612963, "learning_rate": 1.8191796144423713e-06, "loss": 1.7809, "step": 6581 }, { "epoch": 2.243353783231084, "grad_norm": 14.506197797784152, "learning_rate": 1.8176498130561548e-06, "loss": 1.7332, "step": 6582 }, { "epoch": 2.2436946148602592, "grad_norm": 15.272077379944726, "learning_rate": 1.8161205122662885e-06, "loss": 1.476, "step": 6583 }, { "epoch": 2.244035446489434, "grad_norm": 15.665502285474176, "learning_rate": 1.8145917123133412e-06, "loss": 1.3227, "step": 6584 }, { "epoch": 2.2443762781186094, "grad_norm": 28.239980333793717, "learning_rate": 1.8130634134377994e-06, "loss": 1.5959, "step": 6585 }, { "epoch": 2.2447171097477847, "grad_norm": 14.517057664258646, "learning_rate": 1.8115356158800656e-06, "loss": 1.1364, "step": 6586 }, { "epoch": 2.2450579413769596, "grad_norm": 17.33830381677913, "learning_rate": 1.8100083198804757e-06, "loss": 1.3752, "step": 6587 }, { "epoch": 2.245398773006135, "grad_norm": 15.909895509419824, "learning_rate": 1.8084815256792742e-06, "loss": 1.6855, "step": 6588 }, { "epoch": 2.24573960463531, "grad_norm": 10.783483606770533, "learning_rate": 1.8069552335166318e-06, "loss": 1.1569, "step": 6589 }, { "epoch": 2.2460804362644855, "grad_norm": 14.713672542833457, "learning_rate": 1.8054294436326448e-06, "loss": 1.4026, "step": 6590 }, { "epoch": 2.2464212678936604, "grad_norm": 9.863831494760193, "learning_rate": 1.8039041562673215e-06, "loss": 0.8533, "step": 6591 }, { "epoch": 2.2467620995228357, "grad_norm": 22.227051784671584, "learning_rate": 1.8023793716605975e-06, "loss": 1.7393, "step": 6592 }, { "epoch": 2.247102931152011, "grad_norm": 16.01354109961644, "learning_rate": 1.8008550900523275e-06, "loss": 1.1814, "step": 6593 }, { "epoch": 2.2474437627811863, "grad_norm": 15.882306907548562, "learning_rate": 1.799331311682287e-06, "loss": 1.238, "step": 6594 }, { "epoch": 2.247784594410361, "grad_norm": 20.23711473714515, "learning_rate": 1.7978080367901717e-06, "loss": 2.1513, "step": 6595 }, { "epoch": 2.2481254260395365, "grad_norm": 14.220889626831703, "learning_rate": 1.7962852656156e-06, "loss": 1.8809, "step": 6596 }, { "epoch": 2.2484662576687118, "grad_norm": 14.412125884071777, "learning_rate": 1.794762998398109e-06, "loss": 1.4927, "step": 6597 }, { "epoch": 2.2488070892978866, "grad_norm": 17.06475866533461, "learning_rate": 1.7932412353771577e-06, "loss": 1.7292, "step": 6598 }, { "epoch": 2.249147920927062, "grad_norm": 19.509645583208286, "learning_rate": 1.7917199767921257e-06, "loss": 1.6938, "step": 6599 }, { "epoch": 2.2494887525562373, "grad_norm": 17.719341765589444, "learning_rate": 1.7901992228823123e-06, "loss": 2.2548, "step": 6600 }, { "epoch": 2.2498295841854126, "grad_norm": 14.991332218256371, "learning_rate": 1.7886789738869392e-06, "loss": 1.4401, "step": 6601 }, { "epoch": 2.2501704158145874, "grad_norm": 14.178386495300728, "learning_rate": 1.7871592300451462e-06, "loss": 1.5224, "step": 6602 }, { "epoch": 2.2505112474437627, "grad_norm": 10.95613254841417, "learning_rate": 1.7856399915959982e-06, "loss": 1.0864, "step": 6603 }, { "epoch": 2.250852079072938, "grad_norm": 10.394440360046358, "learning_rate": 1.7841212587784718e-06, "loss": 1.0963, "step": 6604 }, { "epoch": 2.2511929107021134, "grad_norm": 16.544083143953596, "learning_rate": 1.7826030318314746e-06, "loss": 1.4097, "step": 6605 }, { "epoch": 2.2515337423312882, "grad_norm": 10.825776113319959, "learning_rate": 1.7810853109938297e-06, "loss": 1.1794, "step": 6606 }, { "epoch": 2.2518745739604635, "grad_norm": 12.60370604914609, "learning_rate": 1.7795680965042762e-06, "loss": 1.2495, "step": 6607 }, { "epoch": 2.252215405589639, "grad_norm": 14.325452739419799, "learning_rate": 1.7780513886014838e-06, "loss": 1.5262, "step": 6608 }, { "epoch": 2.2525562372188137, "grad_norm": 17.778945256073854, "learning_rate": 1.7765351875240323e-06, "loss": 1.266, "step": 6609 }, { "epoch": 2.252897068847989, "grad_norm": 14.059063508773713, "learning_rate": 1.7750194935104265e-06, "loss": 1.8387, "step": 6610 }, { "epoch": 2.2532379004771643, "grad_norm": 19.49041722117298, "learning_rate": 1.773504306799095e-06, "loss": 1.6624, "step": 6611 }, { "epoch": 2.2535787321063396, "grad_norm": 13.38073921183939, "learning_rate": 1.7719896276283783e-06, "loss": 1.7142, "step": 6612 }, { "epoch": 2.2539195637355145, "grad_norm": 25.388975490755232, "learning_rate": 1.7704754562365434e-06, "loss": 1.4283, "step": 6613 }, { "epoch": 2.25426039536469, "grad_norm": 9.888007882207221, "learning_rate": 1.7689617928617754e-06, "loss": 1.1461, "step": 6614 }, { "epoch": 2.254601226993865, "grad_norm": 17.36124321175439, "learning_rate": 1.767448637742179e-06, "loss": 1.4603, "step": 6615 }, { "epoch": 2.2549420586230404, "grad_norm": 18.59219004309801, "learning_rate": 1.765935991115782e-06, "loss": 1.3044, "step": 6616 }, { "epoch": 2.2552828902522153, "grad_norm": 12.923952579194598, "learning_rate": 1.7644238532205238e-06, "loss": 1.3854, "step": 6617 }, { "epoch": 2.2556237218813906, "grad_norm": 12.670191336817188, "learning_rate": 1.7629122242942753e-06, "loss": 1.0385, "step": 6618 }, { "epoch": 2.255964553510566, "grad_norm": 18.34483319293623, "learning_rate": 1.761401104574822e-06, "loss": 1.9168, "step": 6619 }, { "epoch": 2.2563053851397408, "grad_norm": 15.499594422280586, "learning_rate": 1.7598904942998635e-06, "loss": 1.9945, "step": 6620 }, { "epoch": 2.256646216768916, "grad_norm": 20.29388934460904, "learning_rate": 1.758380393707032e-06, "loss": 1.4382, "step": 6621 }, { "epoch": 2.2569870483980914, "grad_norm": 19.0871657280359, "learning_rate": 1.756870803033867e-06, "loss": 1.4828, "step": 6622 }, { "epoch": 2.2573278800272667, "grad_norm": 11.83987335304635, "learning_rate": 1.7553617225178332e-06, "loss": 1.3436, "step": 6623 }, { "epoch": 2.2576687116564416, "grad_norm": 12.069524963247996, "learning_rate": 1.7538531523963199e-06, "loss": 1.4179, "step": 6624 }, { "epoch": 2.258009543285617, "grad_norm": 19.396392579003372, "learning_rate": 1.752345092906626e-06, "loss": 1.3676, "step": 6625 }, { "epoch": 2.258350374914792, "grad_norm": 13.908115126923699, "learning_rate": 1.750837544285977e-06, "loss": 1.2024, "step": 6626 }, { "epoch": 2.2586912065439675, "grad_norm": 13.093756116760877, "learning_rate": 1.7493305067715166e-06, "loss": 1.0771, "step": 6627 }, { "epoch": 2.2590320381731424, "grad_norm": 31.810091407268335, "learning_rate": 1.7478239806003077e-06, "loss": 1.5335, "step": 6628 }, { "epoch": 2.2593728698023177, "grad_norm": 17.66247879927456, "learning_rate": 1.7463179660093333e-06, "loss": 1.3096, "step": 6629 }, { "epoch": 2.259713701431493, "grad_norm": 13.407469672024108, "learning_rate": 1.7448124632354946e-06, "loss": 1.648, "step": 6630 }, { "epoch": 2.260054533060668, "grad_norm": 18.933582044430942, "learning_rate": 1.7433074725156134e-06, "loss": 1.493, "step": 6631 }, { "epoch": 2.260395364689843, "grad_norm": 20.2546094639788, "learning_rate": 1.741802994086431e-06, "loss": 1.3441, "step": 6632 }, { "epoch": 2.2607361963190185, "grad_norm": 17.738129204316206, "learning_rate": 1.7402990281846082e-06, "loss": 1.4795, "step": 6633 }, { "epoch": 2.2610770279481938, "grad_norm": 18.28411105453398, "learning_rate": 1.738795575046724e-06, "loss": 1.6102, "step": 6634 }, { "epoch": 2.2614178595773686, "grad_norm": 15.90048643971666, "learning_rate": 1.7372926349092778e-06, "loss": 1.187, "step": 6635 }, { "epoch": 2.261758691206544, "grad_norm": 14.730672523376057, "learning_rate": 1.7357902080086887e-06, "loss": 1.727, "step": 6636 }, { "epoch": 2.2620995228357192, "grad_norm": 16.707669545418618, "learning_rate": 1.734288294581295e-06, "loss": 1.5973, "step": 6637 }, { "epoch": 2.2624403544648946, "grad_norm": 12.976342657717828, "learning_rate": 1.7327868948633498e-06, "loss": 1.9282, "step": 6638 }, { "epoch": 2.2627811860940694, "grad_norm": 14.922873273383766, "learning_rate": 1.7312860090910332e-06, "loss": 1.2642, "step": 6639 }, { "epoch": 2.2631220177232447, "grad_norm": 18.322585656307428, "learning_rate": 1.7297856375004417e-06, "loss": 1.7845, "step": 6640 }, { "epoch": 2.26346284935242, "grad_norm": 17.14213471619805, "learning_rate": 1.7282857803275838e-06, "loss": 1.1955, "step": 6641 }, { "epoch": 2.263803680981595, "grad_norm": 15.280505872823365, "learning_rate": 1.7267864378083997e-06, "loss": 1.6735, "step": 6642 }, { "epoch": 2.26414451261077, "grad_norm": 20.159906151953376, "learning_rate": 1.7252876101787375e-06, "loss": 1.2082, "step": 6643 }, { "epoch": 2.2644853442399455, "grad_norm": 17.355334166888507, "learning_rate": 1.7237892976743682e-06, "loss": 1.2618, "step": 6644 }, { "epoch": 2.264826175869121, "grad_norm": 19.509458069037358, "learning_rate": 1.7222915005309877e-06, "loss": 1.3894, "step": 6645 }, { "epoch": 2.2651670074982957, "grad_norm": 15.296974855845528, "learning_rate": 1.7207942189841997e-06, "loss": 1.7524, "step": 6646 }, { "epoch": 2.265507839127471, "grad_norm": 15.330207631276974, "learning_rate": 1.7192974532695356e-06, "loss": 1.7839, "step": 6647 }, { "epoch": 2.2658486707566463, "grad_norm": 17.290479792637278, "learning_rate": 1.7178012036224416e-06, "loss": 1.1248, "step": 6648 }, { "epoch": 2.2661895023858216, "grad_norm": 13.488514316487159, "learning_rate": 1.716305470278284e-06, "loss": 1.0104, "step": 6649 }, { "epoch": 2.2665303340149965, "grad_norm": 33.379068569676114, "learning_rate": 1.7148102534723472e-06, "loss": 1.375, "step": 6650 }, { "epoch": 2.266871165644172, "grad_norm": 12.027128486920105, "learning_rate": 1.7133155534398354e-06, "loss": 1.1939, "step": 6651 }, { "epoch": 2.267211997273347, "grad_norm": 20.085954482362204, "learning_rate": 1.7118213704158704e-06, "loss": 1.758, "step": 6652 }, { "epoch": 2.267552828902522, "grad_norm": 13.018821713303696, "learning_rate": 1.7103277046354954e-06, "loss": 1.3956, "step": 6653 }, { "epoch": 2.2678936605316973, "grad_norm": 17.10777902636449, "learning_rate": 1.708834556333664e-06, "loss": 1.8285, "step": 6654 }, { "epoch": 2.2682344921608726, "grad_norm": 19.25681890852201, "learning_rate": 1.7073419257452605e-06, "loss": 1.5933, "step": 6655 }, { "epoch": 2.268575323790048, "grad_norm": 11.56301599524918, "learning_rate": 1.7058498131050804e-06, "loss": 1.462, "step": 6656 }, { "epoch": 2.2689161554192228, "grad_norm": 14.982223901618772, "learning_rate": 1.7043582186478351e-06, "loss": 1.486, "step": 6657 }, { "epoch": 2.269256987048398, "grad_norm": 11.56712984215238, "learning_rate": 1.7028671426081644e-06, "loss": 1.4272, "step": 6658 }, { "epoch": 2.2695978186775734, "grad_norm": 34.295907249115274, "learning_rate": 1.701376585220616e-06, "loss": 1.8885, "step": 6659 }, { "epoch": 2.2699386503067487, "grad_norm": 17.652741349883513, "learning_rate": 1.6998865467196597e-06, "loss": 1.3708, "step": 6660 }, { "epoch": 2.2702794819359235, "grad_norm": 15.440533496657599, "learning_rate": 1.6983970273396899e-06, "loss": 1.2729, "step": 6661 }, { "epoch": 2.270620313565099, "grad_norm": 11.901069723071158, "learning_rate": 1.696908027315009e-06, "loss": 1.215, "step": 6662 }, { "epoch": 2.270961145194274, "grad_norm": 16.8461188692679, "learning_rate": 1.6954195468798445e-06, "loss": 2.243, "step": 6663 }, { "epoch": 2.271301976823449, "grad_norm": 13.292778116944755, "learning_rate": 1.6939315862683398e-06, "loss": 1.1515, "step": 6664 }, { "epoch": 2.2716428084526243, "grad_norm": 12.616848259758049, "learning_rate": 1.6924441457145575e-06, "loss": 1.6608, "step": 6665 }, { "epoch": 2.2719836400817996, "grad_norm": 14.479716321438318, "learning_rate": 1.6909572254524769e-06, "loss": 1.7715, "step": 6666 }, { "epoch": 2.272324471710975, "grad_norm": 15.442574184630516, "learning_rate": 1.689470825715998e-06, "loss": 1.406, "step": 6667 }, { "epoch": 2.27266530334015, "grad_norm": 15.697665983084818, "learning_rate": 1.687984946738937e-06, "loss": 1.5777, "step": 6668 }, { "epoch": 2.273006134969325, "grad_norm": 12.647653878023837, "learning_rate": 1.6864995887550279e-06, "loss": 1.4677, "step": 6669 }, { "epoch": 2.2733469665985004, "grad_norm": 15.59843716557701, "learning_rate": 1.6850147519979232e-06, "loss": 1.1386, "step": 6670 }, { "epoch": 2.2736877982276757, "grad_norm": 14.42697782532658, "learning_rate": 1.6835304367011967e-06, "loss": 1.8743, "step": 6671 }, { "epoch": 2.2740286298568506, "grad_norm": 11.104829899307529, "learning_rate": 1.6820466430983306e-06, "loss": 0.9995, "step": 6672 }, { "epoch": 2.274369461486026, "grad_norm": 12.512092358326173, "learning_rate": 1.6805633714227377e-06, "loss": 1.4562, "step": 6673 }, { "epoch": 2.2747102931152012, "grad_norm": 16.099335910393112, "learning_rate": 1.6790806219077422e-06, "loss": 1.1228, "step": 6674 }, { "epoch": 2.275051124744376, "grad_norm": 21.8918745121986, "learning_rate": 1.677598394786581e-06, "loss": 1.2609, "step": 6675 }, { "epoch": 2.2753919563735514, "grad_norm": 11.207601408741517, "learning_rate": 1.6761166902924215e-06, "loss": 1.0375, "step": 6676 }, { "epoch": 2.2757327880027267, "grad_norm": 8.926238408198289, "learning_rate": 1.6746355086583361e-06, "loss": 0.8715, "step": 6677 }, { "epoch": 2.276073619631902, "grad_norm": 10.252891477848543, "learning_rate": 1.6731548501173234e-06, "loss": 1.2149, "step": 6678 }, { "epoch": 2.276414451261077, "grad_norm": 13.268835709083957, "learning_rate": 1.6716747149022955e-06, "loss": 1.359, "step": 6679 }, { "epoch": 2.276755282890252, "grad_norm": 16.22521325882433, "learning_rate": 1.670195103246085e-06, "loss": 1.9143, "step": 6680 }, { "epoch": 2.2770961145194275, "grad_norm": 15.420220716224126, "learning_rate": 1.668716015381439e-06, "loss": 1.5475, "step": 6681 }, { "epoch": 2.277436946148603, "grad_norm": 26.415790764445475, "learning_rate": 1.6672374515410255e-06, "loss": 2.1361, "step": 6682 }, { "epoch": 2.2777777777777777, "grad_norm": 16.244785446506842, "learning_rate": 1.6657594119574276e-06, "loss": 1.4185, "step": 6683 }, { "epoch": 2.278118609406953, "grad_norm": 18.315055447400223, "learning_rate": 1.6642818968631474e-06, "loss": 1.5227, "step": 6684 }, { "epoch": 2.2784594410361283, "grad_norm": 16.218923301598036, "learning_rate": 1.6628049064906038e-06, "loss": 1.5621, "step": 6685 }, { "epoch": 2.278800272665303, "grad_norm": 17.978990255624762, "learning_rate": 1.661328441072132e-06, "loss": 1.4604, "step": 6686 }, { "epoch": 2.2791411042944785, "grad_norm": 20.290659411502354, "learning_rate": 1.659852500839989e-06, "loss": 1.356, "step": 6687 }, { "epoch": 2.279481935923654, "grad_norm": 13.260005176860616, "learning_rate": 1.658377086026341e-06, "loss": 1.3363, "step": 6688 }, { "epoch": 2.279822767552829, "grad_norm": 43.39425193294985, "learning_rate": 1.6569021968632808e-06, "loss": 1.3586, "step": 6689 }, { "epoch": 2.280163599182004, "grad_norm": 13.335758018778543, "learning_rate": 1.6554278335828155e-06, "loss": 1.0115, "step": 6690 }, { "epoch": 2.2805044308111793, "grad_norm": 19.138463071820496, "learning_rate": 1.6539539964168621e-06, "loss": 1.7568, "step": 6691 }, { "epoch": 2.2808452624403546, "grad_norm": 11.056214692515844, "learning_rate": 1.6524806855972681e-06, "loss": 1.3107, "step": 6692 }, { "epoch": 2.28118609406953, "grad_norm": 15.739803043005855, "learning_rate": 1.6510079013557868e-06, "loss": 1.277, "step": 6693 }, { "epoch": 2.2815269256987047, "grad_norm": 21.35635990038058, "learning_rate": 1.6495356439240916e-06, "loss": 1.4902, "step": 6694 }, { "epoch": 2.28186775732788, "grad_norm": 11.295348047793405, "learning_rate": 1.6480639135337811e-06, "loss": 1.3739, "step": 6695 }, { "epoch": 2.2822085889570554, "grad_norm": 13.877860193643915, "learning_rate": 1.6465927104163587e-06, "loss": 1.2055, "step": 6696 }, { "epoch": 2.2825494205862302, "grad_norm": 24.619563090479133, "learning_rate": 1.6451220348032515e-06, "loss": 0.9322, "step": 6697 }, { "epoch": 2.2828902522154055, "grad_norm": 14.034511974241067, "learning_rate": 1.6436518869258038e-06, "loss": 1.1877, "step": 6698 }, { "epoch": 2.283231083844581, "grad_norm": 17.104662749577862, "learning_rate": 1.6421822670152748e-06, "loss": 1.1943, "step": 6699 }, { "epoch": 2.283571915473756, "grad_norm": 12.98046030020724, "learning_rate": 1.640713175302842e-06, "loss": 1.4216, "step": 6700 }, { "epoch": 2.283912747102931, "grad_norm": 18.621795132695226, "learning_rate": 1.6392446120195992e-06, "loss": 2.0926, "step": 6701 }, { "epoch": 2.2842535787321063, "grad_norm": 15.643808761429078, "learning_rate": 1.6377765773965576e-06, "loss": 1.3443, "step": 6702 }, { "epoch": 2.2845944103612816, "grad_norm": 21.75848646805825, "learning_rate": 1.6363090716646446e-06, "loss": 1.7728, "step": 6703 }, { "epoch": 2.284935241990457, "grad_norm": 13.740036653053219, "learning_rate": 1.6348420950547045e-06, "loss": 1.2649, "step": 6704 }, { "epoch": 2.285276073619632, "grad_norm": 19.76421990587547, "learning_rate": 1.6333756477975005e-06, "loss": 1.7383, "step": 6705 }, { "epoch": 2.285616905248807, "grad_norm": 8.556965950946745, "learning_rate": 1.6319097301237053e-06, "loss": 1.0391, "step": 6706 }, { "epoch": 2.2859577368779824, "grad_norm": 20.169521973952364, "learning_rate": 1.6304443422639194e-06, "loss": 1.6594, "step": 6707 }, { "epoch": 2.2862985685071573, "grad_norm": 17.50467015880147, "learning_rate": 1.628979484448653e-06, "loss": 1.9248, "step": 6708 }, { "epoch": 2.2866394001363326, "grad_norm": 16.226646018426138, "learning_rate": 1.6275151569083298e-06, "loss": 1.1571, "step": 6709 }, { "epoch": 2.286980231765508, "grad_norm": 10.890602050018646, "learning_rate": 1.6260513598733007e-06, "loss": 1.3413, "step": 6710 }, { "epoch": 2.287321063394683, "grad_norm": 11.506208589985594, "learning_rate": 1.6245880935738218e-06, "loss": 0.9149, "step": 6711 }, { "epoch": 2.287661895023858, "grad_norm": 18.531380972145875, "learning_rate": 1.623125358240073e-06, "loss": 1.6056, "step": 6712 }, { "epoch": 2.2880027266530334, "grad_norm": 16.953052687159012, "learning_rate": 1.6216631541021478e-06, "loss": 1.2174, "step": 6713 }, { "epoch": 2.2883435582822087, "grad_norm": 11.078583096432386, "learning_rate": 1.6202014813900563e-06, "loss": 1.3785, "step": 6714 }, { "epoch": 2.288684389911384, "grad_norm": 15.156781262739901, "learning_rate": 1.6187403403337266e-06, "loss": 1.1882, "step": 6715 }, { "epoch": 2.289025221540559, "grad_norm": 9.172399671324946, "learning_rate": 1.6172797311630011e-06, "loss": 1.2359, "step": 6716 }, { "epoch": 2.289366053169734, "grad_norm": 14.491246737446339, "learning_rate": 1.6158196541076399e-06, "loss": 1.3682, "step": 6717 }, { "epoch": 2.2897068847989095, "grad_norm": 17.297751447598113, "learning_rate": 1.6143601093973188e-06, "loss": 1.5846, "step": 6718 }, { "epoch": 2.2900477164280844, "grad_norm": 15.08607700487343, "learning_rate": 1.61290109726163e-06, "loss": 1.2764, "step": 6719 }, { "epoch": 2.2903885480572597, "grad_norm": 18.83537165812134, "learning_rate": 1.6114426179300812e-06, "loss": 1.6266, "step": 6720 }, { "epoch": 2.290729379686435, "grad_norm": 15.558708202172674, "learning_rate": 1.6099846716320998e-06, "loss": 1.4862, "step": 6721 }, { "epoch": 2.29107021131561, "grad_norm": 15.715240984777392, "learning_rate": 1.6085272585970214e-06, "loss": 1.8158, "step": 6722 }, { "epoch": 2.291411042944785, "grad_norm": 16.909837827683425, "learning_rate": 1.607070379054107e-06, "loss": 1.0237, "step": 6723 }, { "epoch": 2.2917518745739605, "grad_norm": 14.641389965145565, "learning_rate": 1.6056140332325304e-06, "loss": 1.133, "step": 6724 }, { "epoch": 2.2920927062031358, "grad_norm": 11.336944624453302, "learning_rate": 1.6041582213613754e-06, "loss": 0.8207, "step": 6725 }, { "epoch": 2.292433537832311, "grad_norm": 17.98109384359319, "learning_rate": 1.6027029436696533e-06, "loss": 1.4761, "step": 6726 }, { "epoch": 2.292774369461486, "grad_norm": 13.767278833664237, "learning_rate": 1.6012482003862806e-06, "loss": 1.2306, "step": 6727 }, { "epoch": 2.2931152010906612, "grad_norm": 27.88181186676902, "learning_rate": 1.5997939917400934e-06, "loss": 1.7846, "step": 6728 }, { "epoch": 2.2934560327198366, "grad_norm": 14.68877615988441, "learning_rate": 1.5983403179598506e-06, "loss": 1.3233, "step": 6729 }, { "epoch": 2.2937968643490114, "grad_norm": 19.182574325203056, "learning_rate": 1.5968871792742151e-06, "loss": 1.5726, "step": 6730 }, { "epoch": 2.2941376959781867, "grad_norm": 13.594741447136839, "learning_rate": 1.595434575911774e-06, "loss": 1.4803, "step": 6731 }, { "epoch": 2.294478527607362, "grad_norm": 12.609263266448384, "learning_rate": 1.5939825081010262e-06, "loss": 1.5238, "step": 6732 }, { "epoch": 2.294819359236537, "grad_norm": 18.55880494169227, "learning_rate": 1.5925309760703884e-06, "loss": 1.9712, "step": 6733 }, { "epoch": 2.295160190865712, "grad_norm": 26.152546812458528, "learning_rate": 1.591079980048193e-06, "loss": 1.4101, "step": 6734 }, { "epoch": 2.2955010224948875, "grad_norm": 15.197016375398364, "learning_rate": 1.589629520262686e-06, "loss": 1.584, "step": 6735 }, { "epoch": 2.295841854124063, "grad_norm": 22.764280939933794, "learning_rate": 1.5881795969420321e-06, "loss": 1.0934, "step": 6736 }, { "epoch": 2.296182685753238, "grad_norm": 12.709266781976341, "learning_rate": 1.586730210314309e-06, "loss": 0.9672, "step": 6737 }, { "epoch": 2.296523517382413, "grad_norm": 17.70363499991965, "learning_rate": 1.5852813606075113e-06, "loss": 1.6462, "step": 6738 }, { "epoch": 2.2968643490115883, "grad_norm": 17.029598280211598, "learning_rate": 1.5838330480495506e-06, "loss": 1.5977, "step": 6739 }, { "epoch": 2.2972051806407636, "grad_norm": 14.362172763840741, "learning_rate": 1.5823852728682471e-06, "loss": 1.4167, "step": 6740 }, { "epoch": 2.2975460122699385, "grad_norm": 13.102356617283949, "learning_rate": 1.580938035291346e-06, "loss": 1.3382, "step": 6741 }, { "epoch": 2.297886843899114, "grad_norm": 13.961558625289065, "learning_rate": 1.5794913355465047e-06, "loss": 1.2359, "step": 6742 }, { "epoch": 2.298227675528289, "grad_norm": 15.458421689406428, "learning_rate": 1.5780451738612907e-06, "loss": 1.2786, "step": 6743 }, { "epoch": 2.298568507157464, "grad_norm": 13.10221699302413, "learning_rate": 1.576599550463191e-06, "loss": 1.0488, "step": 6744 }, { "epoch": 2.2989093387866393, "grad_norm": 13.143928694482318, "learning_rate": 1.5751544655796137e-06, "loss": 1.3512, "step": 6745 }, { "epoch": 2.2992501704158146, "grad_norm": 18.928704738872668, "learning_rate": 1.5737099194378702e-06, "loss": 1.116, "step": 6746 }, { "epoch": 2.29959100204499, "grad_norm": 14.24044128584685, "learning_rate": 1.5722659122651956e-06, "loss": 1.2011, "step": 6747 }, { "epoch": 2.299931833674165, "grad_norm": 10.557616471691759, "learning_rate": 1.5708224442887387e-06, "loss": 1.2577, "step": 6748 }, { "epoch": 2.30027266530334, "grad_norm": 16.001950385839248, "learning_rate": 1.5693795157355618e-06, "loss": 0.9955, "step": 6749 }, { "epoch": 2.3006134969325154, "grad_norm": 19.909435760417786, "learning_rate": 1.5679371268326432e-06, "loss": 1.6249, "step": 6750 }, { "epoch": 2.3009543285616907, "grad_norm": 13.000418764495407, "learning_rate": 1.566495277806877e-06, "loss": 1.4044, "step": 6751 }, { "epoch": 2.3012951601908656, "grad_norm": 25.75223467673309, "learning_rate": 1.5650539688850719e-06, "loss": 1.4686, "step": 6752 }, { "epoch": 2.301635991820041, "grad_norm": 14.421689608071082, "learning_rate": 1.563613200293951e-06, "loss": 1.5243, "step": 6753 }, { "epoch": 2.301976823449216, "grad_norm": 15.204563206067506, "learning_rate": 1.562172972260153e-06, "loss": 1.0579, "step": 6754 }, { "epoch": 2.302317655078391, "grad_norm": 12.473521589071993, "learning_rate": 1.5607332850102335e-06, "loss": 1.4003, "step": 6755 }, { "epoch": 2.3026584867075663, "grad_norm": 13.19118104319589, "learning_rate": 1.5592941387706562e-06, "loss": 1.1751, "step": 6756 }, { "epoch": 2.3029993183367417, "grad_norm": 12.571881670563181, "learning_rate": 1.5578555337678086e-06, "loss": 1.8917, "step": 6757 }, { "epoch": 2.303340149965917, "grad_norm": 22.57273094090433, "learning_rate": 1.55641747022799e-06, "loss": 1.6363, "step": 6758 }, { "epoch": 2.3036809815950923, "grad_norm": 16.290690123574187, "learning_rate": 1.554979948377408e-06, "loss": 1.7881, "step": 6759 }, { "epoch": 2.304021813224267, "grad_norm": 13.912896648605884, "learning_rate": 1.5535429684421972e-06, "loss": 1.4826, "step": 6760 }, { "epoch": 2.3043626448534424, "grad_norm": 15.339064915848784, "learning_rate": 1.5521065306483952e-06, "loss": 1.5438, "step": 6761 }, { "epoch": 2.3047034764826178, "grad_norm": 13.545603113541175, "learning_rate": 1.5506706352219596e-06, "loss": 1.3037, "step": 6762 }, { "epoch": 2.3050443081117926, "grad_norm": 16.712171561680368, "learning_rate": 1.5492352823887674e-06, "loss": 1.7138, "step": 6763 }, { "epoch": 2.305385139740968, "grad_norm": 14.68446960784123, "learning_rate": 1.5478004723746004e-06, "loss": 1.7869, "step": 6764 }, { "epoch": 2.3057259713701432, "grad_norm": 12.806497219527673, "learning_rate": 1.5463662054051615e-06, "loss": 1.6202, "step": 6765 }, { "epoch": 2.306066802999318, "grad_norm": 15.549733330456567, "learning_rate": 1.5449324817060668e-06, "loss": 1.0624, "step": 6766 }, { "epoch": 2.3064076346284934, "grad_norm": 19.36211847631616, "learning_rate": 1.5434993015028466e-06, "loss": 2.01, "step": 6767 }, { "epoch": 2.3067484662576687, "grad_norm": 12.523392246983564, "learning_rate": 1.5420666650209453e-06, "loss": 1.3782, "step": 6768 }, { "epoch": 2.307089297886844, "grad_norm": 16.163878744747123, "learning_rate": 1.5406345724857236e-06, "loss": 1.8176, "step": 6769 }, { "epoch": 2.3074301295160193, "grad_norm": 12.447565047430416, "learning_rate": 1.5392030241224536e-06, "loss": 1.2012, "step": 6770 }, { "epoch": 2.307770961145194, "grad_norm": 11.324011124473, "learning_rate": 1.537772020156325e-06, "loss": 1.366, "step": 6771 }, { "epoch": 2.3081117927743695, "grad_norm": 12.227371762724612, "learning_rate": 1.5363415608124405e-06, "loss": 1.3692, "step": 6772 }, { "epoch": 2.308452624403545, "grad_norm": 17.598700283900854, "learning_rate": 1.5349116463158158e-06, "loss": 1.2154, "step": 6773 }, { "epoch": 2.3087934560327197, "grad_norm": 11.21271415022091, "learning_rate": 1.5334822768913833e-06, "loss": 1.3843, "step": 6774 }, { "epoch": 2.309134287661895, "grad_norm": 17.25332266413702, "learning_rate": 1.5320534527639873e-06, "loss": 1.0401, "step": 6775 }, { "epoch": 2.3094751192910703, "grad_norm": 13.692125824880344, "learning_rate": 1.5306251741583906e-06, "loss": 0.9937, "step": 6776 }, { "epoch": 2.309815950920245, "grad_norm": 13.800168781399183, "learning_rate": 1.5291974412992633e-06, "loss": 1.1928, "step": 6777 }, { "epoch": 2.3101567825494205, "grad_norm": 29.11774387943874, "learning_rate": 1.5277702544111927e-06, "loss": 1.9215, "step": 6778 }, { "epoch": 2.310497614178596, "grad_norm": 18.52897973682685, "learning_rate": 1.5263436137186872e-06, "loss": 1.2699, "step": 6779 }, { "epoch": 2.310838445807771, "grad_norm": 24.542621840845126, "learning_rate": 1.524917519446157e-06, "loss": 1.8465, "step": 6780 }, { "epoch": 2.311179277436946, "grad_norm": 12.848609245222075, "learning_rate": 1.523491971817934e-06, "loss": 1.1834, "step": 6781 }, { "epoch": 2.3115201090661213, "grad_norm": 11.995451511725484, "learning_rate": 1.5220669710582642e-06, "loss": 1.6044, "step": 6782 }, { "epoch": 2.3118609406952966, "grad_norm": 55.26716984816829, "learning_rate": 1.5206425173913037e-06, "loss": 1.5489, "step": 6783 }, { "epoch": 2.312201772324472, "grad_norm": 18.84281376308303, "learning_rate": 1.519218611041126e-06, "loss": 2.3916, "step": 6784 }, { "epoch": 2.3125426039536467, "grad_norm": 22.526907953532003, "learning_rate": 1.5177952522317173e-06, "loss": 1.6027, "step": 6785 }, { "epoch": 2.312883435582822, "grad_norm": 12.210801080284314, "learning_rate": 1.5163724411869773e-06, "loss": 1.1535, "step": 6786 }, { "epoch": 2.3132242672119974, "grad_norm": 11.03956773306301, "learning_rate": 1.5149501781307196e-06, "loss": 1.1996, "step": 6787 }, { "epoch": 2.3135650988411722, "grad_norm": 12.280068973584374, "learning_rate": 1.513528463286672e-06, "loss": 0.9397, "step": 6788 }, { "epoch": 2.3139059304703475, "grad_norm": 12.184354773024825, "learning_rate": 1.5121072968784772e-06, "loss": 1.4346, "step": 6789 }, { "epoch": 2.314246762099523, "grad_norm": 10.482354269405946, "learning_rate": 1.5106866791296865e-06, "loss": 1.1159, "step": 6790 }, { "epoch": 2.314587593728698, "grad_norm": 14.365329746289467, "learning_rate": 1.5092666102637727e-06, "loss": 1.218, "step": 6791 }, { "epoch": 2.314928425357873, "grad_norm": 15.194421549103048, "learning_rate": 1.5078470905041188e-06, "loss": 1.4854, "step": 6792 }, { "epoch": 2.3152692569870483, "grad_norm": 11.277752994270068, "learning_rate": 1.5064281200740155e-06, "loss": 1.3574, "step": 6793 }, { "epoch": 2.3156100886162236, "grad_norm": 14.793994018875994, "learning_rate": 1.5050096991966788e-06, "loss": 1.542, "step": 6794 }, { "epoch": 2.315950920245399, "grad_norm": 20.977041337046984, "learning_rate": 1.5035918280952282e-06, "loss": 1.4081, "step": 6795 }, { "epoch": 2.316291751874574, "grad_norm": 15.262028527572815, "learning_rate": 1.5021745069927002e-06, "loss": 1.5739, "step": 6796 }, { "epoch": 2.316632583503749, "grad_norm": 20.095456587303214, "learning_rate": 1.5007577361120484e-06, "loss": 1.3203, "step": 6797 }, { "epoch": 2.3169734151329244, "grad_norm": 14.5962745453524, "learning_rate": 1.4993415156761338e-06, "loss": 1.2429, "step": 6798 }, { "epoch": 2.3173142467620993, "grad_norm": 20.0191007897799, "learning_rate": 1.4979258459077334e-06, "loss": 1.3649, "step": 6799 }, { "epoch": 2.3176550783912746, "grad_norm": 13.12857755394873, "learning_rate": 1.4965107270295386e-06, "loss": 1.3127, "step": 6800 }, { "epoch": 2.31799591002045, "grad_norm": 13.012164624762967, "learning_rate": 1.4950961592641534e-06, "loss": 1.1499, "step": 6801 }, { "epoch": 2.318336741649625, "grad_norm": 15.393046291799273, "learning_rate": 1.493682142834094e-06, "loss": 1.6572, "step": 6802 }, { "epoch": 2.3186775732788, "grad_norm": 13.372591011868023, "learning_rate": 1.4922686779617918e-06, "loss": 1.5224, "step": 6803 }, { "epoch": 2.3190184049079754, "grad_norm": 17.757662585223102, "learning_rate": 1.49085576486959e-06, "loss": 1.6141, "step": 6804 }, { "epoch": 2.3193592365371507, "grad_norm": 12.772824393122471, "learning_rate": 1.489443403779745e-06, "loss": 0.8032, "step": 6805 }, { "epoch": 2.319700068166326, "grad_norm": 33.988997575519754, "learning_rate": 1.4880315949144276e-06, "loss": 1.5481, "step": 6806 }, { "epoch": 2.320040899795501, "grad_norm": 14.875636352062903, "learning_rate": 1.4866203384957207e-06, "loss": 1.5062, "step": 6807 }, { "epoch": 2.320381731424676, "grad_norm": 13.390818562628388, "learning_rate": 1.485209634745622e-06, "loss": 1.6192, "step": 6808 }, { "epoch": 2.3207225630538515, "grad_norm": 41.30107680320918, "learning_rate": 1.4837994838860365e-06, "loss": 1.6463, "step": 6809 }, { "epoch": 2.3210633946830264, "grad_norm": 22.068219967284286, "learning_rate": 1.482389886138792e-06, "loss": 1.0337, "step": 6810 }, { "epoch": 2.3214042263122017, "grad_norm": 13.982614672816474, "learning_rate": 1.48098084172562e-06, "loss": 1.8987, "step": 6811 }, { "epoch": 2.321745057941377, "grad_norm": 10.0623864375454, "learning_rate": 1.4795723508681682e-06, "loss": 1.2738, "step": 6812 }, { "epoch": 2.3220858895705523, "grad_norm": 11.38963554332603, "learning_rate": 1.478164413788003e-06, "loss": 1.2018, "step": 6813 }, { "epoch": 2.322426721199727, "grad_norm": 11.930530353265775, "learning_rate": 1.4767570307065938e-06, "loss": 1.3074, "step": 6814 }, { "epoch": 2.3227675528289025, "grad_norm": 23.125528513683182, "learning_rate": 1.475350201845328e-06, "loss": 1.7024, "step": 6815 }, { "epoch": 2.3231083844580778, "grad_norm": 26.238006223764096, "learning_rate": 1.4739439274255068e-06, "loss": 1.8091, "step": 6816 }, { "epoch": 2.323449216087253, "grad_norm": 17.033226606559104, "learning_rate": 1.4725382076683425e-06, "loss": 0.9755, "step": 6817 }, { "epoch": 2.323790047716428, "grad_norm": 13.983427053771837, "learning_rate": 1.4711330427949599e-06, "loss": 1.4239, "step": 6818 }, { "epoch": 2.3241308793456033, "grad_norm": 15.581671620791772, "learning_rate": 1.4697284330263971e-06, "loss": 1.6632, "step": 6819 }, { "epoch": 2.3244717109747786, "grad_norm": 15.045577889819684, "learning_rate": 1.4683243785836054e-06, "loss": 1.2086, "step": 6820 }, { "epoch": 2.3248125426039534, "grad_norm": 17.243412289814625, "learning_rate": 1.4669208796874474e-06, "loss": 1.3519, "step": 6821 }, { "epoch": 2.3251533742331287, "grad_norm": 8.609527442118134, "learning_rate": 1.4655179365586992e-06, "loss": 0.6026, "step": 6822 }, { "epoch": 2.325494205862304, "grad_norm": 15.066509517337296, "learning_rate": 1.4641155494180515e-06, "loss": 1.527, "step": 6823 }, { "epoch": 2.3258350374914794, "grad_norm": 21.777522569912048, "learning_rate": 1.4627137184861001e-06, "loss": 1.4248, "step": 6824 }, { "epoch": 2.326175869120654, "grad_norm": 19.70366987392369, "learning_rate": 1.4613124439833638e-06, "loss": 1.2756, "step": 6825 }, { "epoch": 2.3265167007498295, "grad_norm": 17.246912507807966, "learning_rate": 1.4599117261302681e-06, "loss": 1.3245, "step": 6826 }, { "epoch": 2.326857532379005, "grad_norm": 21.28860945652932, "learning_rate": 1.4585115651471466e-06, "loss": 1.3544, "step": 6827 }, { "epoch": 2.32719836400818, "grad_norm": 15.516567097820449, "learning_rate": 1.4571119612542573e-06, "loss": 1.3145, "step": 6828 }, { "epoch": 2.327539195637355, "grad_norm": 16.59127823934273, "learning_rate": 1.4557129146717585e-06, "loss": 2.0661, "step": 6829 }, { "epoch": 2.3278800272665303, "grad_norm": 18.914424273424576, "learning_rate": 1.4543144256197256e-06, "loss": 1.627, "step": 6830 }, { "epoch": 2.3282208588957056, "grad_norm": 11.169128774583875, "learning_rate": 1.4529164943181507e-06, "loss": 1.1925, "step": 6831 }, { "epoch": 2.3285616905248805, "grad_norm": 20.187675431801246, "learning_rate": 1.4515191209869305e-06, "loss": 1.3517, "step": 6832 }, { "epoch": 2.328902522154056, "grad_norm": 15.001786464576563, "learning_rate": 1.450122305845878e-06, "loss": 1.4915, "step": 6833 }, { "epoch": 2.329243353783231, "grad_norm": 15.276839479291493, "learning_rate": 1.4487260491147176e-06, "loss": 1.9139, "step": 6834 }, { "epoch": 2.3295841854124064, "grad_norm": 11.347145850389566, "learning_rate": 1.447330351013087e-06, "loss": 1.1315, "step": 6835 }, { "epoch": 2.3299250170415813, "grad_norm": 11.696811142221314, "learning_rate": 1.4459352117605347e-06, "loss": 1.4671, "step": 6836 }, { "epoch": 2.3302658486707566, "grad_norm": 15.091661564004434, "learning_rate": 1.444540631576521e-06, "loss": 1.8422, "step": 6837 }, { "epoch": 2.330606680299932, "grad_norm": 15.46144754860721, "learning_rate": 1.4431466106804198e-06, "loss": 1.3101, "step": 6838 }, { "epoch": 2.330947511929107, "grad_norm": 16.2638448486805, "learning_rate": 1.441753149291517e-06, "loss": 1.7912, "step": 6839 }, { "epoch": 2.331288343558282, "grad_norm": 13.842879800782557, "learning_rate": 1.4403602476290057e-06, "loss": 1.7586, "step": 6840 }, { "epoch": 2.3316291751874574, "grad_norm": 19.063205168570175, "learning_rate": 1.4389679059119987e-06, "loss": 1.267, "step": 6841 }, { "epoch": 2.3319700068166327, "grad_norm": 13.108203981589552, "learning_rate": 1.437576124359517e-06, "loss": 1.5203, "step": 6842 }, { "epoch": 2.3323108384458076, "grad_norm": 16.580254721682167, "learning_rate": 1.4361849031904895e-06, "loss": 1.1453, "step": 6843 }, { "epoch": 2.332651670074983, "grad_norm": 7.981124102526594, "learning_rate": 1.4347942426237666e-06, "loss": 0.8505, "step": 6844 }, { "epoch": 2.332992501704158, "grad_norm": 15.907146028807505, "learning_rate": 1.4334041428781003e-06, "loss": 0.8349, "step": 6845 }, { "epoch": 2.3333333333333335, "grad_norm": 22.981256128552914, "learning_rate": 1.4320146041721582e-06, "loss": 1.7752, "step": 6846 }, { "epoch": 2.3336741649625083, "grad_norm": 12.705866619568669, "learning_rate": 1.430625626724525e-06, "loss": 1.4825, "step": 6847 }, { "epoch": 2.3340149965916837, "grad_norm": 27.287052703104372, "learning_rate": 1.4292372107536883e-06, "loss": 1.9613, "step": 6848 }, { "epoch": 2.334355828220859, "grad_norm": 13.575089740991263, "learning_rate": 1.4278493564780531e-06, "loss": 1.648, "step": 6849 }, { "epoch": 2.3346966598500343, "grad_norm": 11.25891730638185, "learning_rate": 1.4264620641159339e-06, "loss": 1.0767, "step": 6850 }, { "epoch": 2.335037491479209, "grad_norm": 21.078499383769493, "learning_rate": 1.4250753338855572e-06, "loss": 1.4757, "step": 6851 }, { "epoch": 2.3353783231083844, "grad_norm": 23.174498772220517, "learning_rate": 1.4236891660050611e-06, "loss": 1.1858, "step": 6852 }, { "epoch": 2.3357191547375598, "grad_norm": 13.833487829167005, "learning_rate": 1.4223035606924961e-06, "loss": 1.1578, "step": 6853 }, { "epoch": 2.3360599863667346, "grad_norm": 10.331434959499516, "learning_rate": 1.420918518165823e-06, "loss": 1.025, "step": 6854 }, { "epoch": 2.33640081799591, "grad_norm": 13.149668915446572, "learning_rate": 1.4195340386429147e-06, "loss": 1.3889, "step": 6855 }, { "epoch": 2.3367416496250852, "grad_norm": 16.515303595578484, "learning_rate": 1.4181501223415545e-06, "loss": 1.4766, "step": 6856 }, { "epoch": 2.3370824812542605, "grad_norm": 12.757883692990825, "learning_rate": 1.416766769479439e-06, "loss": 1.8699, "step": 6857 }, { "epoch": 2.3374233128834354, "grad_norm": 14.480414212771501, "learning_rate": 1.4153839802741748e-06, "loss": 1.1457, "step": 6858 }, { "epoch": 2.3377641445126107, "grad_norm": 14.463197201583133, "learning_rate": 1.41400175494328e-06, "loss": 1.2605, "step": 6859 }, { "epoch": 2.338104976141786, "grad_norm": 21.79265923707415, "learning_rate": 1.4126200937041857e-06, "loss": 1.7573, "step": 6860 }, { "epoch": 2.3384458077709613, "grad_norm": 15.01356567375351, "learning_rate": 1.4112389967742274e-06, "loss": 1.2835, "step": 6861 }, { "epoch": 2.338786639400136, "grad_norm": 16.61836092747709, "learning_rate": 1.409858464370663e-06, "loss": 1.6126, "step": 6862 }, { "epoch": 2.3391274710293115, "grad_norm": 24.894490874315537, "learning_rate": 1.4084784967106556e-06, "loss": 1.6772, "step": 6863 }, { "epoch": 2.339468302658487, "grad_norm": 19.882754407374478, "learning_rate": 1.4070990940112744e-06, "loss": 1.3097, "step": 6864 }, { "epoch": 2.3398091342876617, "grad_norm": 21.459429267608133, "learning_rate": 1.405720256489511e-06, "loss": 1.9409, "step": 6865 }, { "epoch": 2.340149965916837, "grad_norm": 35.071493629777265, "learning_rate": 1.404341984362258e-06, "loss": 1.2986, "step": 6866 }, { "epoch": 2.3404907975460123, "grad_norm": 24.971665664980037, "learning_rate": 1.4029642778463226e-06, "loss": 1.5061, "step": 6867 }, { "epoch": 2.3408316291751876, "grad_norm": 14.22368069076007, "learning_rate": 1.401587137158429e-06, "loss": 1.6439, "step": 6868 }, { "epoch": 2.3411724608043625, "grad_norm": 18.328209287451564, "learning_rate": 1.4002105625152007e-06, "loss": 0.9759, "step": 6869 }, { "epoch": 2.341513292433538, "grad_norm": 11.256594558437866, "learning_rate": 1.398834554133181e-06, "loss": 0.8927, "step": 6870 }, { "epoch": 2.341854124062713, "grad_norm": 17.59205085944068, "learning_rate": 1.3974591122288216e-06, "loss": 1.197, "step": 6871 }, { "epoch": 2.3421949556918884, "grad_norm": 14.984163057042698, "learning_rate": 1.3960842370184852e-06, "loss": 1.3412, "step": 6872 }, { "epoch": 2.3425357873210633, "grad_norm": 16.1130629922975, "learning_rate": 1.3947099287184457e-06, "loss": 1.6555, "step": 6873 }, { "epoch": 2.3428766189502386, "grad_norm": 24.37885468712867, "learning_rate": 1.3933361875448831e-06, "loss": 1.5797, "step": 6874 }, { "epoch": 2.343217450579414, "grad_norm": 16.021953324647395, "learning_rate": 1.3919630137138973e-06, "loss": 1.4541, "step": 6875 }, { "epoch": 2.3435582822085887, "grad_norm": 16.017609606047746, "learning_rate": 1.3905904074414934e-06, "loss": 1.6313, "step": 6876 }, { "epoch": 2.343899113837764, "grad_norm": 14.886763044167857, "learning_rate": 1.389218368943584e-06, "loss": 1.7891, "step": 6877 }, { "epoch": 2.3442399454669394, "grad_norm": 15.01082457642746, "learning_rate": 1.3878468984360017e-06, "loss": 1.7901, "step": 6878 }, { "epoch": 2.3445807770961147, "grad_norm": 19.096149937832507, "learning_rate": 1.3864759961344798e-06, "loss": 1.7732, "step": 6879 }, { "epoch": 2.3449216087252895, "grad_norm": 18.099792545189313, "learning_rate": 1.3851056622546667e-06, "loss": 1.5425, "step": 6880 }, { "epoch": 2.345262440354465, "grad_norm": 12.435393393843423, "learning_rate": 1.3837358970121262e-06, "loss": 1.2139, "step": 6881 }, { "epoch": 2.34560327198364, "grad_norm": 13.861659826171381, "learning_rate": 1.382366700622323e-06, "loss": 1.698, "step": 6882 }, { "epoch": 2.3459441036128155, "grad_norm": 14.530774351046299, "learning_rate": 1.380998073300639e-06, "loss": 1.7631, "step": 6883 }, { "epoch": 2.3462849352419903, "grad_norm": 21.055915541753045, "learning_rate": 1.3796300152623642e-06, "loss": 1.5848, "step": 6884 }, { "epoch": 2.3466257668711656, "grad_norm": 12.896581944035924, "learning_rate": 1.3782625267227002e-06, "loss": 1.2976, "step": 6885 }, { "epoch": 2.346966598500341, "grad_norm": 20.80932865404713, "learning_rate": 1.3768956078967578e-06, "loss": 2.0026, "step": 6886 }, { "epoch": 2.347307430129516, "grad_norm": 14.942914694967834, "learning_rate": 1.3755292589995588e-06, "loss": 1.0096, "step": 6887 }, { "epoch": 2.347648261758691, "grad_norm": 24.530144583947045, "learning_rate": 1.3741634802460357e-06, "loss": 1.312, "step": 6888 }, { "epoch": 2.3479890933878664, "grad_norm": 16.11045777838077, "learning_rate": 1.3727982718510314e-06, "loss": 0.6572, "step": 6889 }, { "epoch": 2.3483299250170417, "grad_norm": 8.90477570617953, "learning_rate": 1.3714336340292971e-06, "loss": 1.0004, "step": 6890 }, { "epoch": 2.3486707566462166, "grad_norm": 14.092983748847075, "learning_rate": 1.3700695669954972e-06, "loss": 1.2074, "step": 6891 }, { "epoch": 2.349011588275392, "grad_norm": 53.44140803960102, "learning_rate": 1.3687060709642042e-06, "loss": 1.5928, "step": 6892 }, { "epoch": 2.3493524199045672, "grad_norm": 11.9097323296817, "learning_rate": 1.3673431461499016e-06, "loss": 1.1095, "step": 6893 }, { "epoch": 2.3496932515337425, "grad_norm": 11.938018284936215, "learning_rate": 1.3659807927669844e-06, "loss": 1.3894, "step": 6894 }, { "epoch": 2.3500340831629174, "grad_norm": 24.153141521958293, "learning_rate": 1.3646190110297515e-06, "loss": 1.9008, "step": 6895 }, { "epoch": 2.3503749147920927, "grad_norm": 17.17008918367805, "learning_rate": 1.363257801152421e-06, "loss": 1.6471, "step": 6896 }, { "epoch": 2.350715746421268, "grad_norm": 14.705491386694023, "learning_rate": 1.3618971633491174e-06, "loss": 1.6144, "step": 6897 }, { "epoch": 2.351056578050443, "grad_norm": 14.231279017050605, "learning_rate": 1.3605370978338694e-06, "loss": 1.7055, "step": 6898 }, { "epoch": 2.351397409679618, "grad_norm": 16.671025614757088, "learning_rate": 1.3591776048206268e-06, "loss": 1.1526, "step": 6899 }, { "epoch": 2.3517382413087935, "grad_norm": 18.972160213212696, "learning_rate": 1.3578186845232389e-06, "loss": 1.108, "step": 6900 }, { "epoch": 2.352079072937969, "grad_norm": 14.416138047836414, "learning_rate": 1.3564603371554685e-06, "loss": 1.4124, "step": 6901 }, { "epoch": 2.3524199045671437, "grad_norm": 14.761643797602227, "learning_rate": 1.355102562930995e-06, "loss": 1.2873, "step": 6902 }, { "epoch": 2.352760736196319, "grad_norm": 11.749790063297029, "learning_rate": 1.3537453620633956e-06, "loss": 1.3165, "step": 6903 }, { "epoch": 2.3531015678254943, "grad_norm": 19.048418609872265, "learning_rate": 1.3523887347661658e-06, "loss": 1.5522, "step": 6904 }, { "epoch": 2.3534423994546696, "grad_norm": 11.492549773683917, "learning_rate": 1.3510326812527086e-06, "loss": 1.5647, "step": 6905 }, { "epoch": 2.3537832310838445, "grad_norm": 19.986125792525126, "learning_rate": 1.3496772017363363e-06, "loss": 1.3087, "step": 6906 }, { "epoch": 2.3541240627130198, "grad_norm": 17.389724514822323, "learning_rate": 1.348322296430273e-06, "loss": 1.2393, "step": 6907 }, { "epoch": 2.354464894342195, "grad_norm": 33.74753881527748, "learning_rate": 1.3469679655476453e-06, "loss": 1.747, "step": 6908 }, { "epoch": 2.35480572597137, "grad_norm": 14.371487733404559, "learning_rate": 1.3456142093015002e-06, "loss": 1.6892, "step": 6909 }, { "epoch": 2.3551465576005453, "grad_norm": 15.23288159062672, "learning_rate": 1.3442610279047885e-06, "loss": 1.577, "step": 6910 }, { "epoch": 2.3554873892297206, "grad_norm": 25.69648389333611, "learning_rate": 1.3429084215703675e-06, "loss": 1.7081, "step": 6911 }, { "epoch": 2.355828220858896, "grad_norm": 13.59330374349659, "learning_rate": 1.3415563905110124e-06, "loss": 0.9884, "step": 6912 }, { "epoch": 2.3561690524880707, "grad_norm": 15.772229693850846, "learning_rate": 1.3402049349393991e-06, "loss": 1.4417, "step": 6913 }, { "epoch": 2.356509884117246, "grad_norm": 20.072034205179303, "learning_rate": 1.3388540550681168e-06, "loss": 2.0185, "step": 6914 }, { "epoch": 2.3568507157464214, "grad_norm": 16.021080381212556, "learning_rate": 1.3375037511096688e-06, "loss": 1.9639, "step": 6915 }, { "epoch": 2.3571915473755967, "grad_norm": 13.234228969885658, "learning_rate": 1.3361540232764586e-06, "loss": 1.5235, "step": 6916 }, { "epoch": 2.3575323790047715, "grad_norm": 15.877523661295369, "learning_rate": 1.3348048717808059e-06, "loss": 1.6906, "step": 6917 }, { "epoch": 2.357873210633947, "grad_norm": 18.596339406322844, "learning_rate": 1.3334562968349368e-06, "loss": 1.0381, "step": 6918 }, { "epoch": 2.358214042263122, "grad_norm": 13.328790137484765, "learning_rate": 1.3321082986509882e-06, "loss": 1.2631, "step": 6919 }, { "epoch": 2.358554873892297, "grad_norm": 14.007035819843269, "learning_rate": 1.3307608774410046e-06, "loss": 0.991, "step": 6920 }, { "epoch": 2.3588957055214723, "grad_norm": 12.071419151327644, "learning_rate": 1.329414033416942e-06, "loss": 0.6723, "step": 6921 }, { "epoch": 2.3592365371506476, "grad_norm": 17.699157078120784, "learning_rate": 1.3280677667906638e-06, "loss": 1.6153, "step": 6922 }, { "epoch": 2.359577368779823, "grad_norm": 15.913191643159758, "learning_rate": 1.3267220777739426e-06, "loss": 2.0157, "step": 6923 }, { "epoch": 2.359918200408998, "grad_norm": 13.194259317807461, "learning_rate": 1.325376966578461e-06, "loss": 1.1622, "step": 6924 }, { "epoch": 2.360259032038173, "grad_norm": 13.70402081110196, "learning_rate": 1.3240324334158106e-06, "loss": 1.136, "step": 6925 }, { "epoch": 2.3605998636673484, "grad_norm": 14.022400916412966, "learning_rate": 1.3226884784974914e-06, "loss": 1.3944, "step": 6926 }, { "epoch": 2.3609406952965237, "grad_norm": 17.392967959771063, "learning_rate": 1.3213451020349133e-06, "loss": 1.573, "step": 6927 }, { "epoch": 2.3612815269256986, "grad_norm": 14.045184073568171, "learning_rate": 1.320002304239396e-06, "loss": 1.1975, "step": 6928 }, { "epoch": 2.361622358554874, "grad_norm": 45.913764625646394, "learning_rate": 1.3186600853221625e-06, "loss": 1.1174, "step": 6929 }, { "epoch": 2.361963190184049, "grad_norm": 19.003968199387394, "learning_rate": 1.3173184454943538e-06, "loss": 2.2929, "step": 6930 }, { "epoch": 2.362304021813224, "grad_norm": 11.844114511921473, "learning_rate": 1.3159773849670144e-06, "loss": 1.1568, "step": 6931 }, { "epoch": 2.3626448534423994, "grad_norm": 12.627343876647407, "learning_rate": 1.3146369039510954e-06, "loss": 1.6683, "step": 6932 }, { "epoch": 2.3629856850715747, "grad_norm": 12.537147552743384, "learning_rate": 1.3132970026574643e-06, "loss": 1.1234, "step": 6933 }, { "epoch": 2.36332651670075, "grad_norm": 14.636367449672935, "learning_rate": 1.3119576812968893e-06, "loss": 1.751, "step": 6934 }, { "epoch": 2.363667348329925, "grad_norm": 13.389112364964891, "learning_rate": 1.3106189400800513e-06, "loss": 1.8461, "step": 6935 }, { "epoch": 2.3640081799591, "grad_norm": 12.84100538058378, "learning_rate": 1.3092807792175428e-06, "loss": 1.4986, "step": 6936 }, { "epoch": 2.3643490115882755, "grad_norm": 20.692470366974813, "learning_rate": 1.3079431989198587e-06, "loss": 1.1694, "step": 6937 }, { "epoch": 2.364689843217451, "grad_norm": 14.212761331634802, "learning_rate": 1.306606199397406e-06, "loss": 1.3339, "step": 6938 }, { "epoch": 2.3650306748466257, "grad_norm": 13.668625130002185, "learning_rate": 1.3052697808605013e-06, "loss": 1.5452, "step": 6939 }, { "epoch": 2.365371506475801, "grad_norm": 21.165116788917064, "learning_rate": 1.3039339435193677e-06, "loss": 1.7968, "step": 6940 }, { "epoch": 2.3657123381049763, "grad_norm": 15.287749170368901, "learning_rate": 1.3025986875841396e-06, "loss": 1.7229, "step": 6941 }, { "epoch": 2.366053169734151, "grad_norm": 12.732317703626853, "learning_rate": 1.3012640132648536e-06, "loss": 1.1455, "step": 6942 }, { "epoch": 2.3663940013633264, "grad_norm": 9.918826392676523, "learning_rate": 1.2999299207714634e-06, "loss": 1.2792, "step": 6943 }, { "epoch": 2.3667348329925018, "grad_norm": 19.882135596921223, "learning_rate": 1.298596410313827e-06, "loss": 1.9929, "step": 6944 }, { "epoch": 2.367075664621677, "grad_norm": 13.967213903222222, "learning_rate": 1.2972634821017072e-06, "loss": 1.477, "step": 6945 }, { "epoch": 2.367416496250852, "grad_norm": 12.471087721652314, "learning_rate": 1.2959311363447842e-06, "loss": 1.4324, "step": 6946 }, { "epoch": 2.3677573278800272, "grad_norm": 17.876704152579418, "learning_rate": 1.294599373252637e-06, "loss": 1.0392, "step": 6947 }, { "epoch": 2.3680981595092025, "grad_norm": 15.482069803795651, "learning_rate": 1.2932681930347568e-06, "loss": 1.4305, "step": 6948 }, { "epoch": 2.368438991138378, "grad_norm": 15.20943249398943, "learning_rate": 1.2919375959005493e-06, "loss": 1.2714, "step": 6949 }, { "epoch": 2.3687798227675527, "grad_norm": 13.114094429128906, "learning_rate": 1.2906075820593166e-06, "loss": 1.9379, "step": 6950 }, { "epoch": 2.369120654396728, "grad_norm": 42.61191701155305, "learning_rate": 1.2892781517202767e-06, "loss": 1.8965, "step": 6951 }, { "epoch": 2.3694614860259033, "grad_norm": 16.42140635537054, "learning_rate": 1.2879493050925578e-06, "loss": 1.6126, "step": 6952 }, { "epoch": 2.369802317655078, "grad_norm": 15.549077429892614, "learning_rate": 1.2866210423851882e-06, "loss": 1.3019, "step": 6953 }, { "epoch": 2.3701431492842535, "grad_norm": 14.236481603428127, "learning_rate": 1.2852933638071114e-06, "loss": 1.6961, "step": 6954 }, { "epoch": 2.370483980913429, "grad_norm": 18.638722865657183, "learning_rate": 1.283966269567175e-06, "loss": 1.6279, "step": 6955 }, { "epoch": 2.370824812542604, "grad_norm": 25.659903455213257, "learning_rate": 1.282639759874138e-06, "loss": 1.6431, "step": 6956 }, { "epoch": 2.371165644171779, "grad_norm": 20.62248242023754, "learning_rate": 1.2813138349366644e-06, "loss": 1.5438, "step": 6957 }, { "epoch": 2.3715064758009543, "grad_norm": 12.303292072548448, "learning_rate": 1.2799884949633274e-06, "loss": 1.2436, "step": 6958 }, { "epoch": 2.3718473074301296, "grad_norm": 18.13752907850377, "learning_rate": 1.278663740162609e-06, "loss": 1.4326, "step": 6959 }, { "epoch": 2.372188139059305, "grad_norm": 13.409820786821584, "learning_rate": 1.277339570742898e-06, "loss": 1.2325, "step": 6960 }, { "epoch": 2.37252897068848, "grad_norm": 13.281682487278548, "learning_rate": 1.2760159869124916e-06, "loss": 1.2503, "step": 6961 }, { "epoch": 2.372869802317655, "grad_norm": 14.568492007209182, "learning_rate": 1.274692988879596e-06, "loss": 1.4656, "step": 6962 }, { "epoch": 2.3732106339468304, "grad_norm": 30.162897538181554, "learning_rate": 1.2733705768523203e-06, "loss": 1.3545, "step": 6963 }, { "epoch": 2.3735514655760053, "grad_norm": 14.305693062838511, "learning_rate": 1.2720487510386893e-06, "loss": 1.6591, "step": 6964 }, { "epoch": 2.3738922972051806, "grad_norm": 11.199114838420437, "learning_rate": 1.2707275116466306e-06, "loss": 1.5526, "step": 6965 }, { "epoch": 2.374233128834356, "grad_norm": 13.649920900142094, "learning_rate": 1.2694068588839775e-06, "loss": 1.3413, "step": 6966 }, { "epoch": 2.374573960463531, "grad_norm": 16.79702901190038, "learning_rate": 1.2680867929584783e-06, "loss": 1.4527, "step": 6967 }, { "epoch": 2.374914792092706, "grad_norm": 24.781500394787756, "learning_rate": 1.2667673140777819e-06, "loss": 0.9448, "step": 6968 }, { "epoch": 2.3752556237218814, "grad_norm": 14.88290357633069, "learning_rate": 1.2654484224494474e-06, "loss": 1.4149, "step": 6969 }, { "epoch": 2.3755964553510567, "grad_norm": 17.641558986949004, "learning_rate": 1.264130118280943e-06, "loss": 1.9408, "step": 6970 }, { "epoch": 2.375937286980232, "grad_norm": 15.102681884510146, "learning_rate": 1.2628124017796424e-06, "loss": 1.692, "step": 6971 }, { "epoch": 2.376278118609407, "grad_norm": 23.66544277253339, "learning_rate": 1.2614952731528274e-06, "loss": 1.0848, "step": 6972 }, { "epoch": 2.376618950238582, "grad_norm": 14.410010122101026, "learning_rate": 1.2601787326076887e-06, "loss": 1.0601, "step": 6973 }, { "epoch": 2.3769597818677575, "grad_norm": 12.567294484094354, "learning_rate": 1.2588627803513226e-06, "loss": 1.6451, "step": 6974 }, { "epoch": 2.3773006134969323, "grad_norm": 13.624149937757027, "learning_rate": 1.2575474165907336e-06, "loss": 1.0157, "step": 6975 }, { "epoch": 2.3776414451261076, "grad_norm": 14.220848256214689, "learning_rate": 1.256232641532834e-06, "loss": 1.2987, "step": 6976 }, { "epoch": 2.377982276755283, "grad_norm": 18.935339730392744, "learning_rate": 1.2549184553844428e-06, "loss": 1.7898, "step": 6977 }, { "epoch": 2.3783231083844583, "grad_norm": 43.79873430644483, "learning_rate": 1.2536048583522886e-06, "loss": 1.3228, "step": 6978 }, { "epoch": 2.378663940013633, "grad_norm": 15.320431104738805, "learning_rate": 1.2522918506430004e-06, "loss": 1.3611, "step": 6979 }, { "epoch": 2.3790047716428084, "grad_norm": 12.97989069748599, "learning_rate": 1.2509794324631247e-06, "loss": 1.115, "step": 6980 }, { "epoch": 2.3793456032719837, "grad_norm": 12.795421545903066, "learning_rate": 1.2496676040191092e-06, "loss": 1.508, "step": 6981 }, { "epoch": 2.379686434901159, "grad_norm": 16.40834559349729, "learning_rate": 1.2483563655173065e-06, "loss": 1.0984, "step": 6982 }, { "epoch": 2.380027266530334, "grad_norm": 14.797290783533818, "learning_rate": 1.2470457171639843e-06, "loss": 1.7167, "step": 6983 }, { "epoch": 2.3803680981595092, "grad_norm": 18.55770344201877, "learning_rate": 1.2457356591653091e-06, "loss": 1.7436, "step": 6984 }, { "epoch": 2.3807089297886845, "grad_norm": 19.524658419273354, "learning_rate": 1.2444261917273588e-06, "loss": 1.1198, "step": 6985 }, { "epoch": 2.3810497614178594, "grad_norm": 16.68527495223767, "learning_rate": 1.2431173150561204e-06, "loss": 1.5301, "step": 6986 }, { "epoch": 2.3813905930470347, "grad_norm": 18.59392890461778, "learning_rate": 1.241809029357483e-06, "loss": 1.1901, "step": 6987 }, { "epoch": 2.38173142467621, "grad_norm": 14.134157098471109, "learning_rate": 1.2405013348372457e-06, "loss": 1.4953, "step": 6988 }, { "epoch": 2.3820722563053853, "grad_norm": 21.302106797698798, "learning_rate": 1.239194231701114e-06, "loss": 1.9773, "step": 6989 }, { "epoch": 2.38241308793456, "grad_norm": 20.25191222623521, "learning_rate": 1.2378877201547002e-06, "loss": 1.8932, "step": 6990 }, { "epoch": 2.3827539195637355, "grad_norm": 31.499990150423645, "learning_rate": 1.2365818004035246e-06, "loss": 1.4572, "step": 6991 }, { "epoch": 2.383094751192911, "grad_norm": 30.788647490476766, "learning_rate": 1.2352764726530131e-06, "loss": 1.4803, "step": 6992 }, { "epoch": 2.383435582822086, "grad_norm": 15.858119375872036, "learning_rate": 1.2339717371084986e-06, "loss": 1.3608, "step": 6993 }, { "epoch": 2.383776414451261, "grad_norm": 13.46585075939964, "learning_rate": 1.232667593975222e-06, "loss": 1.1625, "step": 6994 }, { "epoch": 2.3841172460804363, "grad_norm": 16.078581332487328, "learning_rate": 1.231364043458329e-06, "loss": 1.6507, "step": 6995 }, { "epoch": 2.3844580777096116, "grad_norm": 15.967388421152492, "learning_rate": 1.2300610857628765e-06, "loss": 1.5809, "step": 6996 }, { "epoch": 2.3847989093387865, "grad_norm": 15.843909261561034, "learning_rate": 1.228758721093819e-06, "loss": 1.8243, "step": 6997 }, { "epoch": 2.3851397409679618, "grad_norm": 17.85406237921348, "learning_rate": 1.2274569496560284e-06, "loss": 1.8169, "step": 6998 }, { "epoch": 2.385480572597137, "grad_norm": 16.47686166843419, "learning_rate": 1.2261557716542787e-06, "loss": 1.6329, "step": 6999 }, { "epoch": 2.3858214042263124, "grad_norm": 14.343838635940568, "learning_rate": 1.2248551872932463e-06, "loss": 1.1968, "step": 7000 }, { "epoch": 2.3861622358554873, "grad_norm": 15.49213221433796, "learning_rate": 1.2235551967775239e-06, "loss": 1.4102, "step": 7001 }, { "epoch": 2.3865030674846626, "grad_norm": 13.112898138185567, "learning_rate": 1.2222558003116002e-06, "loss": 1.1654, "step": 7002 }, { "epoch": 2.386843899113838, "grad_norm": 17.310010536255177, "learning_rate": 1.220956998099878e-06, "loss": 1.2332, "step": 7003 }, { "epoch": 2.387184730743013, "grad_norm": 18.22467520859551, "learning_rate": 1.2196587903466634e-06, "loss": 1.5653, "step": 7004 }, { "epoch": 2.387525562372188, "grad_norm": 11.67953496598649, "learning_rate": 1.2183611772561698e-06, "loss": 0.9619, "step": 7005 }, { "epoch": 2.3878663940013634, "grad_norm": 13.77191605584328, "learning_rate": 1.2170641590325166e-06, "loss": 1.6336, "step": 7006 }, { "epoch": 2.3882072256305387, "grad_norm": 12.79057897959964, "learning_rate": 1.2157677358797304e-06, "loss": 1.7458, "step": 7007 }, { "epoch": 2.3885480572597135, "grad_norm": 21.35310057102306, "learning_rate": 1.2144719080017431e-06, "loss": 1.5974, "step": 7008 }, { "epoch": 2.388888888888889, "grad_norm": 17.365410457559342, "learning_rate": 1.2131766756023945e-06, "loss": 1.3649, "step": 7009 }, { "epoch": 2.389229720518064, "grad_norm": 13.578519499943866, "learning_rate": 1.211882038885429e-06, "loss": 1.4866, "step": 7010 }, { "epoch": 2.3895705521472395, "grad_norm": 15.41621458078481, "learning_rate": 1.2105879980544987e-06, "loss": 1.7434, "step": 7011 }, { "epoch": 2.3899113837764143, "grad_norm": 16.195667332040912, "learning_rate": 1.2092945533131623e-06, "loss": 1.1849, "step": 7012 }, { "epoch": 2.3902522154055896, "grad_norm": 19.150970663866296, "learning_rate": 1.2080017048648795e-06, "loss": 1.2507, "step": 7013 }, { "epoch": 2.390593047034765, "grad_norm": 15.195159840159308, "learning_rate": 1.2067094529130253e-06, "loss": 2.0224, "step": 7014 }, { "epoch": 2.3909338786639402, "grad_norm": 14.557186974164281, "learning_rate": 1.205417797660876e-06, "loss": 1.506, "step": 7015 }, { "epoch": 2.391274710293115, "grad_norm": 15.812773772825977, "learning_rate": 1.2041267393116096e-06, "loss": 2.0729, "step": 7016 }, { "epoch": 2.3916155419222904, "grad_norm": 17.10048913519934, "learning_rate": 1.20283627806832e-06, "loss": 1.868, "step": 7017 }, { "epoch": 2.3919563735514657, "grad_norm": 14.78916733663254, "learning_rate": 1.2015464141339984e-06, "loss": 2.0143, "step": 7018 }, { "epoch": 2.3922972051806406, "grad_norm": 13.96069017004883, "learning_rate": 1.2002571477115448e-06, "loss": 1.3359, "step": 7019 }, { "epoch": 2.392638036809816, "grad_norm": 13.63921874886805, "learning_rate": 1.1989684790037714e-06, "loss": 0.9749, "step": 7020 }, { "epoch": 2.392978868438991, "grad_norm": 17.69042882804265, "learning_rate": 1.1976804082133852e-06, "loss": 1.5044, "step": 7021 }, { "epoch": 2.393319700068166, "grad_norm": 17.030141038439524, "learning_rate": 1.196392935543007e-06, "loss": 2.072, "step": 7022 }, { "epoch": 2.3936605316973414, "grad_norm": 15.014342300595306, "learning_rate": 1.1951060611951615e-06, "loss": 1.4243, "step": 7023 }, { "epoch": 2.3940013633265167, "grad_norm": 24.584826393154177, "learning_rate": 1.1938197853722794e-06, "loss": 1.3802, "step": 7024 }, { "epoch": 2.394342194955692, "grad_norm": 14.564857927531339, "learning_rate": 1.192534108276696e-06, "loss": 1.2822, "step": 7025 }, { "epoch": 2.3946830265848673, "grad_norm": 17.230959177496132, "learning_rate": 1.1912490301106537e-06, "loss": 1.6805, "step": 7026 }, { "epoch": 2.395023858214042, "grad_norm": 11.509296208180213, "learning_rate": 1.1899645510763015e-06, "loss": 1.569, "step": 7027 }, { "epoch": 2.3953646898432175, "grad_norm": 19.862169354633593, "learning_rate": 1.1886806713756922e-06, "loss": 1.6138, "step": 7028 }, { "epoch": 2.395705521472393, "grad_norm": 13.801228281538613, "learning_rate": 1.187397391210785e-06, "loss": 1.4264, "step": 7029 }, { "epoch": 2.3960463531015677, "grad_norm": 14.717161038247282, "learning_rate": 1.1861147107834464e-06, "loss": 1.6696, "step": 7030 }, { "epoch": 2.396387184730743, "grad_norm": 12.330478534988172, "learning_rate": 1.1848326302954439e-06, "loss": 1.5595, "step": 7031 }, { "epoch": 2.3967280163599183, "grad_norm": 14.026062989258138, "learning_rate": 1.1835511499484564e-06, "loss": 1.1489, "step": 7032 }, { "epoch": 2.397068847989093, "grad_norm": 15.358377457675845, "learning_rate": 1.1822702699440675e-06, "loss": 1.4529, "step": 7033 }, { "epoch": 2.3974096796182685, "grad_norm": 15.276961939596886, "learning_rate": 1.1809899904837614e-06, "loss": 1.7157, "step": 7034 }, { "epoch": 2.3977505112474438, "grad_norm": 19.645765438564013, "learning_rate": 1.1797103117689318e-06, "loss": 1.556, "step": 7035 }, { "epoch": 2.398091342876619, "grad_norm": 11.088985495144144, "learning_rate": 1.1784312340008786e-06, "loss": 1.0321, "step": 7036 }, { "epoch": 2.3984321745057944, "grad_norm": 14.034685284548413, "learning_rate": 1.1771527573808044e-06, "loss": 1.233, "step": 7037 }, { "epoch": 2.3987730061349692, "grad_norm": 17.875418813063465, "learning_rate": 1.17587488210982e-06, "loss": 1.8297, "step": 7038 }, { "epoch": 2.3991138377641446, "grad_norm": 20.201180391839706, "learning_rate": 1.1745976083889388e-06, "loss": 1.6173, "step": 7039 }, { "epoch": 2.39945466939332, "grad_norm": 12.916569460187533, "learning_rate": 1.1733209364190822e-06, "loss": 1.4033, "step": 7040 }, { "epoch": 2.3997955010224947, "grad_norm": 8.889416545427792, "learning_rate": 1.1720448664010753e-06, "loss": 1.0879, "step": 7041 }, { "epoch": 2.40013633265167, "grad_norm": 20.795211346122944, "learning_rate": 1.1707693985356495e-06, "loss": 1.6864, "step": 7042 }, { "epoch": 2.4004771642808453, "grad_norm": 15.847959829100834, "learning_rate": 1.1694945330234403e-06, "loss": 1.614, "step": 7043 }, { "epoch": 2.40081799591002, "grad_norm": 18.788473198468118, "learning_rate": 1.1682202700649898e-06, "loss": 1.7367, "step": 7044 }, { "epoch": 2.4011588275391955, "grad_norm": 63.12189574126776, "learning_rate": 1.1669466098607434e-06, "loss": 1.7914, "step": 7045 }, { "epoch": 2.401499659168371, "grad_norm": 8.497290437069513, "learning_rate": 1.1656735526110558e-06, "loss": 1.1216, "step": 7046 }, { "epoch": 2.401840490797546, "grad_norm": 15.747156633187076, "learning_rate": 1.1644010985161791e-06, "loss": 1.4531, "step": 7047 }, { "epoch": 2.4021813224267214, "grad_norm": 18.8081522040647, "learning_rate": 1.1631292477762795e-06, "loss": 1.457, "step": 7048 }, { "epoch": 2.4025221540558963, "grad_norm": 15.290144931578189, "learning_rate": 1.1618580005914248e-06, "loss": 1.6402, "step": 7049 }, { "epoch": 2.4028629856850716, "grad_norm": 23.713016634879033, "learning_rate": 1.1605873571615823e-06, "loss": 1.7239, "step": 7050 }, { "epoch": 2.403203817314247, "grad_norm": 17.057990509780307, "learning_rate": 1.1593173176866346e-06, "loss": 1.6484, "step": 7051 }, { "epoch": 2.403544648943422, "grad_norm": 14.38782401749088, "learning_rate": 1.158047882366361e-06, "loss": 1.2655, "step": 7052 }, { "epoch": 2.403885480572597, "grad_norm": 18.56286285929236, "learning_rate": 1.156779051400448e-06, "loss": 1.5789, "step": 7053 }, { "epoch": 2.4042263122017724, "grad_norm": 11.570175888639799, "learning_rate": 1.1555108249884916e-06, "loss": 1.392, "step": 7054 }, { "epoch": 2.4045671438309473, "grad_norm": 12.2242050048677, "learning_rate": 1.154243203329985e-06, "loss": 1.2173, "step": 7055 }, { "epoch": 2.4049079754601226, "grad_norm": 13.579864752454561, "learning_rate": 1.1529761866243317e-06, "loss": 1.6638, "step": 7056 }, { "epoch": 2.405248807089298, "grad_norm": 24.98382267790553, "learning_rate": 1.1517097750708383e-06, "loss": 1.1107, "step": 7057 }, { "epoch": 2.405589638718473, "grad_norm": 9.391527762550352, "learning_rate": 1.1504439688687168e-06, "loss": 1.2071, "step": 7058 }, { "epoch": 2.4059304703476485, "grad_norm": 15.793792111747186, "learning_rate": 1.149178768217083e-06, "loss": 1.3567, "step": 7059 }, { "epoch": 2.4062713019768234, "grad_norm": 15.739614569008056, "learning_rate": 1.1479141733149585e-06, "loss": 1.519, "step": 7060 }, { "epoch": 2.4066121336059987, "grad_norm": 17.589995472551717, "learning_rate": 1.1466501843612681e-06, "loss": 1.4502, "step": 7061 }, { "epoch": 2.406952965235174, "grad_norm": 14.323967917246092, "learning_rate": 1.1453868015548436e-06, "loss": 1.341, "step": 7062 }, { "epoch": 2.407293796864349, "grad_norm": 9.085143381605834, "learning_rate": 1.1441240250944192e-06, "loss": 0.7196, "step": 7063 }, { "epoch": 2.407634628493524, "grad_norm": 14.520647349747339, "learning_rate": 1.1428618551786364e-06, "loss": 1.6496, "step": 7064 }, { "epoch": 2.4079754601226995, "grad_norm": 18.48700234152163, "learning_rate": 1.1416002920060353e-06, "loss": 1.322, "step": 7065 }, { "epoch": 2.4083162917518743, "grad_norm": 22.31584688320049, "learning_rate": 1.1403393357750696e-06, "loss": 1.8924, "step": 7066 }, { "epoch": 2.4086571233810496, "grad_norm": 13.78481037065328, "learning_rate": 1.1390789866840918e-06, "loss": 1.6013, "step": 7067 }, { "epoch": 2.408997955010225, "grad_norm": 11.089031513598648, "learning_rate": 1.1378192449313575e-06, "loss": 1.0885, "step": 7068 }, { "epoch": 2.4093387866394003, "grad_norm": 37.25570369565831, "learning_rate": 1.1365601107150292e-06, "loss": 1.4194, "step": 7069 }, { "epoch": 2.409679618268575, "grad_norm": 13.085105617965022, "learning_rate": 1.1353015842331771e-06, "loss": 1.7585, "step": 7070 }, { "epoch": 2.4100204498977504, "grad_norm": 15.046023251949068, "learning_rate": 1.1340436656837694e-06, "loss": 1.6423, "step": 7071 }, { "epoch": 2.4103612815269257, "grad_norm": 18.62919705442938, "learning_rate": 1.132786355264683e-06, "loss": 1.5826, "step": 7072 }, { "epoch": 2.410702113156101, "grad_norm": 17.350113854187743, "learning_rate": 1.1315296531736968e-06, "loss": 1.8454, "step": 7073 }, { "epoch": 2.411042944785276, "grad_norm": 14.847478515867026, "learning_rate": 1.1302735596084957e-06, "loss": 1.7459, "step": 7074 }, { "epoch": 2.4113837764144512, "grad_norm": 7.786197563691469, "learning_rate": 1.129018074766668e-06, "loss": 0.8365, "step": 7075 }, { "epoch": 2.4117246080436265, "grad_norm": 20.24825254797009, "learning_rate": 1.1277631988457073e-06, "loss": 1.7471, "step": 7076 }, { "epoch": 2.4120654396728014, "grad_norm": 11.225915806106249, "learning_rate": 1.1265089320430089e-06, "loss": 1.4868, "step": 7077 }, { "epoch": 2.4124062713019767, "grad_norm": 15.56525215425584, "learning_rate": 1.125255274555876e-06, "loss": 1.1467, "step": 7078 }, { "epoch": 2.412747102931152, "grad_norm": 17.30941104132712, "learning_rate": 1.124002226581512e-06, "loss": 1.1405, "step": 7079 }, { "epoch": 2.4130879345603273, "grad_norm": 13.910186971757367, "learning_rate": 1.122749788317029e-06, "loss": 1.5828, "step": 7080 }, { "epoch": 2.413428766189502, "grad_norm": 17.125558478055297, "learning_rate": 1.1214979599594362e-06, "loss": 1.3406, "step": 7081 }, { "epoch": 2.4137695978186775, "grad_norm": 14.12667435320065, "learning_rate": 1.1202467417056546e-06, "loss": 1.2574, "step": 7082 }, { "epoch": 2.414110429447853, "grad_norm": 16.022605856091594, "learning_rate": 1.1189961337525069e-06, "loss": 1.6862, "step": 7083 }, { "epoch": 2.414451261077028, "grad_norm": 10.824339665033548, "learning_rate": 1.1177461362967135e-06, "loss": 1.6799, "step": 7084 }, { "epoch": 2.414792092706203, "grad_norm": 15.970867917947709, "learning_rate": 1.11649674953491e-06, "loss": 1.4583, "step": 7085 }, { "epoch": 2.4151329243353783, "grad_norm": 19.938767425699176, "learning_rate": 1.1152479736636262e-06, "loss": 1.7508, "step": 7086 }, { "epoch": 2.4154737559645536, "grad_norm": 17.271671002541062, "learning_rate": 1.1139998088792985e-06, "loss": 1.3655, "step": 7087 }, { "epoch": 2.4158145875937285, "grad_norm": 17.86557953872723, "learning_rate": 1.1127522553782731e-06, "loss": 0.9778, "step": 7088 }, { "epoch": 2.4161554192229038, "grad_norm": 15.401788537597023, "learning_rate": 1.111505313356791e-06, "loss": 1.7464, "step": 7089 }, { "epoch": 2.416496250852079, "grad_norm": 18.805370963452326, "learning_rate": 1.1102589830110023e-06, "loss": 1.619, "step": 7090 }, { "epoch": 2.4168370824812544, "grad_norm": 13.730701263628776, "learning_rate": 1.1090132645369595e-06, "loss": 1.1127, "step": 7091 }, { "epoch": 2.4171779141104293, "grad_norm": 12.13506384702224, "learning_rate": 1.1077681581306194e-06, "loss": 1.5546, "step": 7092 }, { "epoch": 2.4175187457396046, "grad_norm": 10.510919811423076, "learning_rate": 1.1065236639878419e-06, "loss": 1.2782, "step": 7093 }, { "epoch": 2.41785957736878, "grad_norm": 16.059786072933125, "learning_rate": 1.1052797823043914e-06, "loss": 1.9169, "step": 7094 }, { "epoch": 2.418200408997955, "grad_norm": 11.847389105352597, "learning_rate": 1.104036513275935e-06, "loss": 1.0675, "step": 7095 }, { "epoch": 2.41854124062713, "grad_norm": 12.73303147839179, "learning_rate": 1.102793857098044e-06, "loss": 1.3634, "step": 7096 }, { "epoch": 2.4188820722563054, "grad_norm": 16.456801749318732, "learning_rate": 1.1015518139661935e-06, "loss": 1.4058, "step": 7097 }, { "epoch": 2.4192229038854807, "grad_norm": 11.909038669358392, "learning_rate": 1.1003103840757612e-06, "loss": 1.5088, "step": 7098 }, { "epoch": 2.4195637355146555, "grad_norm": 12.759009162211667, "learning_rate": 1.0990695676220303e-06, "loss": 0.9713, "step": 7099 }, { "epoch": 2.419904567143831, "grad_norm": 14.324306080480063, "learning_rate": 1.0978293648001826e-06, "loss": 1.4712, "step": 7100 }, { "epoch": 2.420245398773006, "grad_norm": 13.803605554053629, "learning_rate": 1.0965897758053124e-06, "loss": 1.1397, "step": 7101 }, { "epoch": 2.4205862304021815, "grad_norm": 25.614722978476266, "learning_rate": 1.0953508008324076e-06, "loss": 1.5058, "step": 7102 }, { "epoch": 2.4209270620313563, "grad_norm": 14.634662708050781, "learning_rate": 1.0941124400763637e-06, "loss": 1.3194, "step": 7103 }, { "epoch": 2.4212678936605316, "grad_norm": 11.520007653990081, "learning_rate": 1.0928746937319844e-06, "loss": 1.2202, "step": 7104 }, { "epoch": 2.421608725289707, "grad_norm": 11.622557579274886, "learning_rate": 1.0916375619939678e-06, "loss": 1.6477, "step": 7105 }, { "epoch": 2.4219495569188823, "grad_norm": 13.81427588432943, "learning_rate": 1.0904010450569209e-06, "loss": 1.5044, "step": 7106 }, { "epoch": 2.422290388548057, "grad_norm": 18.115273386028875, "learning_rate": 1.0891651431153527e-06, "loss": 1.1851, "step": 7107 }, { "epoch": 2.4226312201772324, "grad_norm": 17.207037061878033, "learning_rate": 1.0879298563636758e-06, "loss": 1.286, "step": 7108 }, { "epoch": 2.4229720518064077, "grad_norm": 29.793197083970952, "learning_rate": 1.0866951849962054e-06, "loss": 1.7181, "step": 7109 }, { "epoch": 2.4233128834355826, "grad_norm": 12.491347235778273, "learning_rate": 1.085461129207161e-06, "loss": 0.9687, "step": 7110 }, { "epoch": 2.423653715064758, "grad_norm": 16.73720297463059, "learning_rate": 1.0842276891906628e-06, "loss": 1.1401, "step": 7111 }, { "epoch": 2.423994546693933, "grad_norm": 12.66518582946452, "learning_rate": 1.0829948651407374e-06, "loss": 1.6341, "step": 7112 }, { "epoch": 2.4243353783231085, "grad_norm": 15.091405838639584, "learning_rate": 1.081762657251313e-06, "loss": 1.3658, "step": 7113 }, { "epoch": 2.4246762099522834, "grad_norm": 15.341949726275107, "learning_rate": 1.0805310657162206e-06, "loss": 1.3334, "step": 7114 }, { "epoch": 2.4250170415814587, "grad_norm": 10.315824382751103, "learning_rate": 1.0793000907291917e-06, "loss": 1.4225, "step": 7115 }, { "epoch": 2.425357873210634, "grad_norm": 13.301266991679588, "learning_rate": 1.0780697324838674e-06, "loss": 1.2304, "step": 7116 }, { "epoch": 2.4256987048398093, "grad_norm": 13.906059791791161, "learning_rate": 1.0768399911737876e-06, "loss": 1.2848, "step": 7117 }, { "epoch": 2.426039536468984, "grad_norm": 18.619211554307576, "learning_rate": 1.075610866992392e-06, "loss": 1.4381, "step": 7118 }, { "epoch": 2.4263803680981595, "grad_norm": 23.22007309504667, "learning_rate": 1.0743823601330316e-06, "loss": 1.6922, "step": 7119 }, { "epoch": 2.426721199727335, "grad_norm": 12.430442367438534, "learning_rate": 1.0731544707889519e-06, "loss": 1.6499, "step": 7120 }, { "epoch": 2.4270620313565097, "grad_norm": 16.66284595993648, "learning_rate": 1.0719271991533042e-06, "loss": 1.5648, "step": 7121 }, { "epoch": 2.427402862985685, "grad_norm": 23.150045428144303, "learning_rate": 1.0707005454191472e-06, "loss": 1.3762, "step": 7122 }, { "epoch": 2.4277436946148603, "grad_norm": 21.476017038566606, "learning_rate": 1.0694745097794351e-06, "loss": 1.7551, "step": 7123 }, { "epoch": 2.4280845262440356, "grad_norm": 12.667598173731351, "learning_rate": 1.068249092427029e-06, "loss": 1.1269, "step": 7124 }, { "epoch": 2.4284253578732105, "grad_norm": 18.423747588292517, "learning_rate": 1.067024293554692e-06, "loss": 1.1292, "step": 7125 }, { "epoch": 2.4287661895023858, "grad_norm": 22.846097846321975, "learning_rate": 1.0658001133550899e-06, "loss": 1.122, "step": 7126 }, { "epoch": 2.429107021131561, "grad_norm": 18.86687595292585, "learning_rate": 1.0645765520207913e-06, "loss": 1.3992, "step": 7127 }, { "epoch": 2.4294478527607364, "grad_norm": 20.34203407561231, "learning_rate": 1.0633536097442671e-06, "loss": 1.2087, "step": 7128 }, { "epoch": 2.4297886843899112, "grad_norm": 16.39243063289598, "learning_rate": 1.0621312867178913e-06, "loss": 1.3791, "step": 7129 }, { "epoch": 2.4301295160190866, "grad_norm": 21.37298828794129, "learning_rate": 1.0609095831339388e-06, "loss": 1.5701, "step": 7130 }, { "epoch": 2.430470347648262, "grad_norm": 25.069697877037836, "learning_rate": 1.0596884991845902e-06, "loss": 1.8068, "step": 7131 }, { "epoch": 2.4308111792774367, "grad_norm": 19.21318709342924, "learning_rate": 1.0584680350619265e-06, "loss": 1.1256, "step": 7132 }, { "epoch": 2.431152010906612, "grad_norm": 10.760179588178525, "learning_rate": 1.057248190957932e-06, "loss": 1.1747, "step": 7133 }, { "epoch": 2.4314928425357873, "grad_norm": 16.24756980000676, "learning_rate": 1.0560289670644896e-06, "loss": 1.4922, "step": 7134 }, { "epoch": 2.4318336741649627, "grad_norm": 20.911650069422834, "learning_rate": 1.0548103635733937e-06, "loss": 1.4788, "step": 7135 }, { "epoch": 2.4321745057941375, "grad_norm": 11.035724783387087, "learning_rate": 1.0535923806763305e-06, "loss": 1.1321, "step": 7136 }, { "epoch": 2.432515337423313, "grad_norm": 20.64871033623647, "learning_rate": 1.0523750185648946e-06, "loss": 1.3925, "step": 7137 }, { "epoch": 2.432856169052488, "grad_norm": 10.643667266020392, "learning_rate": 1.0511582774305855e-06, "loss": 1.3709, "step": 7138 }, { "epoch": 2.4331970006816634, "grad_norm": 12.26696273258922, "learning_rate": 1.0499421574647977e-06, "loss": 1.2436, "step": 7139 }, { "epoch": 2.4335378323108383, "grad_norm": 12.444754393903482, "learning_rate": 1.048726658858833e-06, "loss": 1.4068, "step": 7140 }, { "epoch": 2.4338786639400136, "grad_norm": 20.011379153717154, "learning_rate": 1.0475117818038933e-06, "loss": 1.1694, "step": 7141 }, { "epoch": 2.434219495569189, "grad_norm": 13.970963287917797, "learning_rate": 1.0462975264910847e-06, "loss": 1.5911, "step": 7142 }, { "epoch": 2.434560327198364, "grad_norm": 13.205531546370915, "learning_rate": 1.0450838931114137e-06, "loss": 1.4693, "step": 7143 }, { "epoch": 2.434901158827539, "grad_norm": 16.45159288316795, "learning_rate": 1.0438708818557902e-06, "loss": 1.2645, "step": 7144 }, { "epoch": 2.4352419904567144, "grad_norm": 12.710235156389514, "learning_rate": 1.042658492915025e-06, "loss": 1.4887, "step": 7145 }, { "epoch": 2.4355828220858897, "grad_norm": 18.07307968072705, "learning_rate": 1.0414467264798327e-06, "loss": 1.2976, "step": 7146 }, { "epoch": 2.4359236537150646, "grad_norm": 14.129865158001456, "learning_rate": 1.0402355827408283e-06, "loss": 1.3888, "step": 7147 }, { "epoch": 2.43626448534424, "grad_norm": 13.081556518413857, "learning_rate": 1.0390250618885307e-06, "loss": 1.5739, "step": 7148 }, { "epoch": 2.436605316973415, "grad_norm": 13.598146721714945, "learning_rate": 1.037815164113356e-06, "loss": 1.3265, "step": 7149 }, { "epoch": 2.4369461486025905, "grad_norm": 20.160380677801303, "learning_rate": 1.0366058896056296e-06, "loss": 1.8446, "step": 7150 }, { "epoch": 2.4372869802317654, "grad_norm": 13.221189023748131, "learning_rate": 1.0353972385555761e-06, "loss": 1.3325, "step": 7151 }, { "epoch": 2.4376278118609407, "grad_norm": 12.192226605175833, "learning_rate": 1.0341892111533165e-06, "loss": 1.3386, "step": 7152 }, { "epoch": 2.437968643490116, "grad_norm": 12.732308628508301, "learning_rate": 1.0329818075888832e-06, "loss": 1.3764, "step": 7153 }, { "epoch": 2.438309475119291, "grad_norm": 25.28563881877948, "learning_rate": 1.031775028052202e-06, "loss": 1.6427, "step": 7154 }, { "epoch": 2.438650306748466, "grad_norm": 20.87275224703967, "learning_rate": 1.0305688727331041e-06, "loss": 0.8538, "step": 7155 }, { "epoch": 2.4389911383776415, "grad_norm": 12.529244170059357, "learning_rate": 1.0293633418213267e-06, "loss": 1.4225, "step": 7156 }, { "epoch": 2.439331970006817, "grad_norm": 13.205856168960803, "learning_rate": 1.0281584355065006e-06, "loss": 1.6709, "step": 7157 }, { "epoch": 2.4396728016359917, "grad_norm": 11.592059313546304, "learning_rate": 1.0269541539781618e-06, "loss": 1.586, "step": 7158 }, { "epoch": 2.440013633265167, "grad_norm": 24.545225792126, "learning_rate": 1.0257504974257538e-06, "loss": 1.4787, "step": 7159 }, { "epoch": 2.4403544648943423, "grad_norm": 21.472539464160924, "learning_rate": 1.0245474660386113e-06, "loss": 1.5991, "step": 7160 }, { "epoch": 2.4406952965235176, "grad_norm": 16.59670312576498, "learning_rate": 1.023345060005978e-06, "loss": 1.6018, "step": 7161 }, { "epoch": 2.4410361281526924, "grad_norm": 17.462023887859964, "learning_rate": 1.0221432795169966e-06, "loss": 1.7785, "step": 7162 }, { "epoch": 2.4413769597818678, "grad_norm": 16.351531910906346, "learning_rate": 1.020942124760712e-06, "loss": 1.3093, "step": 7163 }, { "epoch": 2.441717791411043, "grad_norm": 25.742755621875826, "learning_rate": 1.0197415959260732e-06, "loss": 1.2187, "step": 7164 }, { "epoch": 2.442058623040218, "grad_norm": 22.58556373268965, "learning_rate": 1.018541693201922e-06, "loss": 1.745, "step": 7165 }, { "epoch": 2.4423994546693932, "grad_norm": 15.285258438497669, "learning_rate": 1.0173424167770141e-06, "loss": 1.4242, "step": 7166 }, { "epoch": 2.4427402862985685, "grad_norm": 18.75762409549442, "learning_rate": 1.0161437668399988e-06, "loss": 0.903, "step": 7167 }, { "epoch": 2.443081117927744, "grad_norm": 11.439159631846241, "learning_rate": 1.0149457435794252e-06, "loss": 1.3394, "step": 7168 }, { "epoch": 2.4434219495569187, "grad_norm": 13.052281208399728, "learning_rate": 1.0137483471837529e-06, "loss": 1.5572, "step": 7169 }, { "epoch": 2.443762781186094, "grad_norm": 15.38057135098321, "learning_rate": 1.012551577841332e-06, "loss": 1.9112, "step": 7170 }, { "epoch": 2.4441036128152693, "grad_norm": 17.540768765286426, "learning_rate": 1.0113554357404199e-06, "loss": 1.3663, "step": 7171 }, { "epoch": 2.4444444444444446, "grad_norm": 12.534619906945778, "learning_rate": 1.010159921069178e-06, "loss": 0.883, "step": 7172 }, { "epoch": 2.4447852760736195, "grad_norm": 15.527856441101953, "learning_rate": 1.008965034015662e-06, "loss": 1.8254, "step": 7173 }, { "epoch": 2.445126107702795, "grad_norm": 15.21566996958172, "learning_rate": 1.0077707747678339e-06, "loss": 1.8015, "step": 7174 }, { "epoch": 2.44546693933197, "grad_norm": 17.578904511838907, "learning_rate": 1.0065771435135546e-06, "loss": 1.8197, "step": 7175 }, { "epoch": 2.445807770961145, "grad_norm": 10.8992401969301, "learning_rate": 1.0053841404405878e-06, "loss": 1.1596, "step": 7176 }, { "epoch": 2.4461486025903203, "grad_norm": 13.519823886982053, "learning_rate": 1.0041917657365974e-06, "loss": 1.5636, "step": 7177 }, { "epoch": 2.4464894342194956, "grad_norm": 19.49895569267218, "learning_rate": 1.0030000195891486e-06, "loss": 1.9788, "step": 7178 }, { "epoch": 2.446830265848671, "grad_norm": 15.164066070945205, "learning_rate": 1.001808902185708e-06, "loss": 1.6152, "step": 7179 }, { "epoch": 2.447171097477846, "grad_norm": 18.365426992121996, "learning_rate": 1.0006184137136426e-06, "loss": 1.5637, "step": 7180 }, { "epoch": 2.447511929107021, "grad_norm": 26.74460709948205, "learning_rate": 9.994285543602217e-07, "loss": 1.3699, "step": 7181 }, { "epoch": 2.4478527607361964, "grad_norm": 14.410508492109862, "learning_rate": 9.982393243126142e-07, "loss": 1.5025, "step": 7182 }, { "epoch": 2.4481935923653717, "grad_norm": 14.498190116549527, "learning_rate": 9.970507237578914e-07, "loss": 1.3214, "step": 7183 }, { "epoch": 2.4485344239945466, "grad_norm": 10.78599835856865, "learning_rate": 9.958627528830239e-07, "loss": 1.7572, "step": 7184 }, { "epoch": 2.448875255623722, "grad_norm": 16.72751419940287, "learning_rate": 9.946754118748868e-07, "loss": 1.161, "step": 7185 }, { "epoch": 2.449216087252897, "grad_norm": 16.8445693738929, "learning_rate": 9.934887009202494e-07, "loss": 1.2553, "step": 7186 }, { "epoch": 2.449556918882072, "grad_norm": 11.24946671174485, "learning_rate": 9.92302620205789e-07, "loss": 1.3664, "step": 7187 }, { "epoch": 2.4498977505112474, "grad_norm": 14.818794960105349, "learning_rate": 9.911171699180817e-07, "loss": 1.5927, "step": 7188 }, { "epoch": 2.4502385821404227, "grad_norm": 14.069749257709743, "learning_rate": 9.899323502435993e-07, "loss": 1.7592, "step": 7189 }, { "epoch": 2.450579413769598, "grad_norm": 15.629690629614114, "learning_rate": 9.887481613687245e-07, "loss": 0.8777, "step": 7190 }, { "epoch": 2.450920245398773, "grad_norm": 13.39520631686912, "learning_rate": 9.8756460347973e-07, "loss": 0.9351, "step": 7191 }, { "epoch": 2.451261077027948, "grad_norm": 18.803148215768317, "learning_rate": 9.863816767627943e-07, "loss": 0.8648, "step": 7192 }, { "epoch": 2.4516019086571235, "grad_norm": 14.254662614855748, "learning_rate": 9.851993814040011e-07, "loss": 1.4748, "step": 7193 }, { "epoch": 2.4519427402862988, "grad_norm": 11.363620518404744, "learning_rate": 9.840177175893262e-07, "loss": 1.3806, "step": 7194 }, { "epoch": 2.4522835719154736, "grad_norm": 10.961294660406253, "learning_rate": 9.828366855046507e-07, "loss": 1.3043, "step": 7195 }, { "epoch": 2.452624403544649, "grad_norm": 11.410141045633805, "learning_rate": 9.816562853357564e-07, "loss": 0.9093, "step": 7196 }, { "epoch": 2.4529652351738243, "grad_norm": 13.927484231166975, "learning_rate": 9.80476517268324e-07, "loss": 1.4542, "step": 7197 }, { "epoch": 2.453306066802999, "grad_norm": 15.148515229663053, "learning_rate": 9.792973814879376e-07, "loss": 1.3128, "step": 7198 }, { "epoch": 2.4536468984321744, "grad_norm": 10.960153867689325, "learning_rate": 9.78118878180076e-07, "loss": 1.502, "step": 7199 }, { "epoch": 2.4539877300613497, "grad_norm": 12.964453482058124, "learning_rate": 9.769410075301267e-07, "loss": 1.6452, "step": 7200 }, { "epoch": 2.454328561690525, "grad_norm": 16.182466326357858, "learning_rate": 9.757637697233723e-07, "loss": 1.1855, "step": 7201 }, { "epoch": 2.4546693933197, "grad_norm": 16.187040222625527, "learning_rate": 9.745871649449944e-07, "loss": 1.397, "step": 7202 }, { "epoch": 2.455010224948875, "grad_norm": 19.58717837756131, "learning_rate": 9.734111933800827e-07, "loss": 1.2609, "step": 7203 }, { "epoch": 2.4553510565780505, "grad_norm": 10.168480713735102, "learning_rate": 9.722358552136174e-07, "loss": 1.361, "step": 7204 }, { "epoch": 2.455691888207226, "grad_norm": 11.838139631796846, "learning_rate": 9.710611506304845e-07, "loss": 1.3299, "step": 7205 }, { "epoch": 2.4560327198364007, "grad_norm": 11.677211987502739, "learning_rate": 9.698870798154736e-07, "loss": 0.9799, "step": 7206 }, { "epoch": 2.456373551465576, "grad_norm": 22.201120175248597, "learning_rate": 9.687136429532662e-07, "loss": 1.5797, "step": 7207 }, { "epoch": 2.4567143830947513, "grad_norm": 14.812152062407037, "learning_rate": 9.67540840228451e-07, "loss": 1.4067, "step": 7208 }, { "epoch": 2.457055214723926, "grad_norm": 12.824720784414266, "learning_rate": 9.663686718255138e-07, "loss": 1.3922, "step": 7209 }, { "epoch": 2.4573960463531015, "grad_norm": 18.849793068130023, "learning_rate": 9.65197137928841e-07, "loss": 0.8182, "step": 7210 }, { "epoch": 2.457736877982277, "grad_norm": 14.17527324126507, "learning_rate": 9.640262387227206e-07, "loss": 1.3101, "step": 7211 }, { "epoch": 2.458077709611452, "grad_norm": 13.98788571627043, "learning_rate": 9.62855974391339e-07, "loss": 1.5673, "step": 7212 }, { "epoch": 2.458418541240627, "grad_norm": 17.364585038630988, "learning_rate": 9.616863451187836e-07, "loss": 1.4092, "step": 7213 }, { "epoch": 2.4587593728698023, "grad_norm": 14.277996737852556, "learning_rate": 9.605173510890415e-07, "loss": 1.303, "step": 7214 }, { "epoch": 2.4591002044989776, "grad_norm": 16.870737996666172, "learning_rate": 9.593489924859999e-07, "loss": 1.3883, "step": 7215 }, { "epoch": 2.459441036128153, "grad_norm": 23.57177672096655, "learning_rate": 9.581812694934472e-07, "loss": 1.3017, "step": 7216 }, { "epoch": 2.4597818677573278, "grad_norm": 12.461238986142392, "learning_rate": 9.570141822950708e-07, "loss": 1.5356, "step": 7217 }, { "epoch": 2.460122699386503, "grad_norm": 22.562103751534757, "learning_rate": 9.558477310744568e-07, "loss": 0.9012, "step": 7218 }, { "epoch": 2.4604635310156784, "grad_norm": 18.127004769759765, "learning_rate": 9.546819160150956e-07, "loss": 1.4209, "step": 7219 }, { "epoch": 2.4608043626448532, "grad_norm": 22.8403729083543, "learning_rate": 9.535167373003695e-07, "loss": 1.4932, "step": 7220 }, { "epoch": 2.4611451942740286, "grad_norm": 9.270554983289106, "learning_rate": 9.523521951135701e-07, "loss": 1.3037, "step": 7221 }, { "epoch": 2.461486025903204, "grad_norm": 11.645806817482242, "learning_rate": 9.511882896378844e-07, "loss": 1.4348, "step": 7222 }, { "epoch": 2.461826857532379, "grad_norm": 17.87513809929892, "learning_rate": 9.50025021056396e-07, "loss": 1.3196, "step": 7223 }, { "epoch": 2.462167689161554, "grad_norm": 16.13476683348674, "learning_rate": 9.488623895520959e-07, "loss": 1.7471, "step": 7224 }, { "epoch": 2.4625085207907293, "grad_norm": 15.083576141262654, "learning_rate": 9.477003953078673e-07, "loss": 1.0385, "step": 7225 }, { "epoch": 2.4628493524199047, "grad_norm": 14.368290074912666, "learning_rate": 9.465390385064965e-07, "loss": 1.4466, "step": 7226 }, { "epoch": 2.46319018404908, "grad_norm": 13.444666024993124, "learning_rate": 9.453783193306737e-07, "loss": 1.5229, "step": 7227 }, { "epoch": 2.463531015678255, "grad_norm": 15.667801033702055, "learning_rate": 9.442182379629805e-07, "loss": 1.6101, "step": 7228 }, { "epoch": 2.46387184730743, "grad_norm": 17.875506923700254, "learning_rate": 9.430587945859032e-07, "loss": 1.9103, "step": 7229 }, { "epoch": 2.4642126789366054, "grad_norm": 17.49330484459176, "learning_rate": 9.418999893818276e-07, "loss": 1.3707, "step": 7230 }, { "epoch": 2.4645535105657803, "grad_norm": 17.955943203993076, "learning_rate": 9.407418225330378e-07, "loss": 1.9069, "step": 7231 }, { "epoch": 2.4648943421949556, "grad_norm": 13.636319585377386, "learning_rate": 9.395842942217198e-07, "loss": 1.584, "step": 7232 }, { "epoch": 2.465235173824131, "grad_norm": 16.839059590235827, "learning_rate": 9.38427404629953e-07, "loss": 1.2318, "step": 7233 }, { "epoch": 2.4655760054533062, "grad_norm": 10.330959930581962, "learning_rate": 9.37271153939725e-07, "loss": 1.1717, "step": 7234 }, { "epoch": 2.465916837082481, "grad_norm": 20.433025698107343, "learning_rate": 9.361155423329188e-07, "loss": 1.8687, "step": 7235 }, { "epoch": 2.4662576687116564, "grad_norm": 40.03858249058752, "learning_rate": 9.349605699913128e-07, "loss": 1.2256, "step": 7236 }, { "epoch": 2.4665985003408317, "grad_norm": 18.82611296232626, "learning_rate": 9.338062370965933e-07, "loss": 1.6781, "step": 7237 }, { "epoch": 2.466939331970007, "grad_norm": 13.268400931992273, "learning_rate": 9.32652543830338e-07, "loss": 1.3044, "step": 7238 }, { "epoch": 2.467280163599182, "grad_norm": 15.7931899415614, "learning_rate": 9.314994903740277e-07, "loss": 1.2555, "step": 7239 }, { "epoch": 2.467620995228357, "grad_norm": 13.14380409053015, "learning_rate": 9.303470769090456e-07, "loss": 1.4949, "step": 7240 }, { "epoch": 2.4679618268575325, "grad_norm": 17.12241143231086, "learning_rate": 9.291953036166673e-07, "loss": 1.5554, "step": 7241 }, { "epoch": 2.4683026584867074, "grad_norm": 15.620158303830387, "learning_rate": 9.280441706780729e-07, "loss": 1.4395, "step": 7242 }, { "epoch": 2.4686434901158827, "grad_norm": 15.41240465227015, "learning_rate": 9.268936782743398e-07, "loss": 1.5205, "step": 7243 }, { "epoch": 2.468984321745058, "grad_norm": 18.41532110964911, "learning_rate": 9.257438265864454e-07, "loss": 1.7013, "step": 7244 }, { "epoch": 2.4693251533742333, "grad_norm": 12.144089250184471, "learning_rate": 9.245946157952656e-07, "loss": 1.4905, "step": 7245 }, { "epoch": 2.469665985003408, "grad_norm": 15.658000063337223, "learning_rate": 9.234460460815763e-07, "loss": 1.5081, "step": 7246 }, { "epoch": 2.4700068166325835, "grad_norm": 12.59686655548456, "learning_rate": 9.222981176260515e-07, "loss": 1.6243, "step": 7247 }, { "epoch": 2.470347648261759, "grad_norm": 16.043615750645948, "learning_rate": 9.211508306092654e-07, "loss": 1.9371, "step": 7248 }, { "epoch": 2.470688479890934, "grad_norm": 14.9662544793883, "learning_rate": 9.200041852116903e-07, "loss": 1.414, "step": 7249 }, { "epoch": 2.471029311520109, "grad_norm": 15.597322042713373, "learning_rate": 9.188581816136988e-07, "loss": 1.8718, "step": 7250 }, { "epoch": 2.4713701431492843, "grad_norm": 14.982816326494522, "learning_rate": 9.17712819995561e-07, "loss": 1.8928, "step": 7251 }, { "epoch": 2.4717109747784596, "grad_norm": 14.963856948213275, "learning_rate": 9.165681005374477e-07, "loss": 1.504, "step": 7252 }, { "epoch": 2.4720518064076344, "grad_norm": 13.693706777317677, "learning_rate": 9.154240234194283e-07, "loss": 1.5746, "step": 7253 }, { "epoch": 2.4723926380368098, "grad_norm": 27.363992450065297, "learning_rate": 9.142805888214673e-07, "loss": 1.2542, "step": 7254 }, { "epoch": 2.472733469665985, "grad_norm": 19.697659165278434, "learning_rate": 9.131377969234351e-07, "loss": 1.3685, "step": 7255 }, { "epoch": 2.4730743012951604, "grad_norm": 19.579023001738474, "learning_rate": 9.119956479050979e-07, "loss": 1.7573, "step": 7256 }, { "epoch": 2.4734151329243352, "grad_norm": 12.931122520869199, "learning_rate": 9.108541419461159e-07, "loss": 1.5519, "step": 7257 }, { "epoch": 2.4737559645535105, "grad_norm": 15.47420862218098, "learning_rate": 9.097132792260582e-07, "loss": 1.2184, "step": 7258 }, { "epoch": 2.474096796182686, "grad_norm": 14.74881546475577, "learning_rate": 9.085730599243825e-07, "loss": 1.403, "step": 7259 }, { "epoch": 2.474437627811861, "grad_norm": 17.484296609969448, "learning_rate": 9.074334842204502e-07, "loss": 1.4673, "step": 7260 }, { "epoch": 2.474778459441036, "grad_norm": 10.078901244125214, "learning_rate": 9.062945522935251e-07, "loss": 0.8473, "step": 7261 }, { "epoch": 2.4751192910702113, "grad_norm": 16.249880127589734, "learning_rate": 9.051562643227618e-07, "loss": 1.4238, "step": 7262 }, { "epoch": 2.4754601226993866, "grad_norm": 23.8161549515034, "learning_rate": 9.040186204872187e-07, "loss": 1.0332, "step": 7263 }, { "epoch": 2.4758009543285615, "grad_norm": 13.237333071598837, "learning_rate": 9.02881620965852e-07, "loss": 0.7131, "step": 7264 }, { "epoch": 2.476141785957737, "grad_norm": 11.337234602730579, "learning_rate": 9.017452659375158e-07, "loss": 1.0034, "step": 7265 }, { "epoch": 2.476482617586912, "grad_norm": 17.4055310616967, "learning_rate": 9.006095555809646e-07, "loss": 1.6727, "step": 7266 }, { "epoch": 2.4768234492160874, "grad_norm": 14.005628033926971, "learning_rate": 8.994744900748464e-07, "loss": 1.6518, "step": 7267 }, { "epoch": 2.4771642808452623, "grad_norm": 12.918073777405686, "learning_rate": 8.983400695977157e-07, "loss": 1.9534, "step": 7268 }, { "epoch": 2.4775051124744376, "grad_norm": 11.671468300043681, "learning_rate": 8.972062943280213e-07, "loss": 1.5628, "step": 7269 }, { "epoch": 2.477845944103613, "grad_norm": 21.773698671504246, "learning_rate": 8.960731644441062e-07, "loss": 1.9202, "step": 7270 }, { "epoch": 2.4781867757327882, "grad_norm": 12.016252813064868, "learning_rate": 8.949406801242216e-07, "loss": 1.536, "step": 7271 }, { "epoch": 2.478527607361963, "grad_norm": 18.603103597652378, "learning_rate": 8.938088415465085e-07, "loss": 1.3815, "step": 7272 }, { "epoch": 2.4788684389911384, "grad_norm": 14.937071268401132, "learning_rate": 8.926776488890088e-07, "loss": 1.4647, "step": 7273 }, { "epoch": 2.4792092706203137, "grad_norm": 13.748867576204576, "learning_rate": 8.915471023296679e-07, "loss": 1.2132, "step": 7274 }, { "epoch": 2.4795501022494886, "grad_norm": 12.158054227426947, "learning_rate": 8.904172020463209e-07, "loss": 1.3479, "step": 7275 }, { "epoch": 2.479890933878664, "grad_norm": 10.940859701880347, "learning_rate": 8.892879482167066e-07, "loss": 1.5001, "step": 7276 }, { "epoch": 2.480231765507839, "grad_norm": 13.391777854471858, "learning_rate": 8.881593410184642e-07, "loss": 1.6364, "step": 7277 }, { "epoch": 2.4805725971370145, "grad_norm": 14.97460204398591, "learning_rate": 8.870313806291248e-07, "loss": 1.6099, "step": 7278 }, { "epoch": 2.4809134287661894, "grad_norm": 14.648752701461312, "learning_rate": 8.859040672261216e-07, "loss": 1.7127, "step": 7279 }, { "epoch": 2.4812542603953647, "grad_norm": 17.185113957049612, "learning_rate": 8.84777400986786e-07, "loss": 2.1919, "step": 7280 }, { "epoch": 2.48159509202454, "grad_norm": 21.035684668432104, "learning_rate": 8.836513820883468e-07, "loss": 1.7349, "step": 7281 }, { "epoch": 2.4819359236537153, "grad_norm": 19.291868187192378, "learning_rate": 8.825260107079314e-07, "loss": 1.673, "step": 7282 }, { "epoch": 2.48227675528289, "grad_norm": 17.969087104437563, "learning_rate": 8.814012870225652e-07, "loss": 1.5835, "step": 7283 }, { "epoch": 2.4826175869120655, "grad_norm": 11.518077340679586, "learning_rate": 8.802772112091723e-07, "loss": 0.9113, "step": 7284 }, { "epoch": 2.4829584185412408, "grad_norm": 14.937137887495476, "learning_rate": 8.791537834445734e-07, "loss": 1.1796, "step": 7285 }, { "epoch": 2.4832992501704156, "grad_norm": 13.05421455830903, "learning_rate": 8.780310039054884e-07, "loss": 1.6152, "step": 7286 }, { "epoch": 2.483640081799591, "grad_norm": 17.3514101966365, "learning_rate": 8.769088727685365e-07, "loss": 1.1963, "step": 7287 }, { "epoch": 2.4839809134287663, "grad_norm": 18.493868641136718, "learning_rate": 8.757873902102293e-07, "loss": 1.6259, "step": 7288 }, { "epoch": 2.4843217450579416, "grad_norm": 35.77229918087143, "learning_rate": 8.746665564069845e-07, "loss": 1.6115, "step": 7289 }, { "epoch": 2.4846625766871164, "grad_norm": 17.232779489242258, "learning_rate": 8.735463715351139e-07, "loss": 1.6328, "step": 7290 }, { "epoch": 2.4850034083162917, "grad_norm": 18.611301167372854, "learning_rate": 8.724268357708227e-07, "loss": 1.8876, "step": 7291 }, { "epoch": 2.485344239945467, "grad_norm": 14.739756991505775, "learning_rate": 8.713079492902237e-07, "loss": 1.1354, "step": 7292 }, { "epoch": 2.4856850715746424, "grad_norm": 14.518684781049565, "learning_rate": 8.701897122693187e-07, "loss": 1.5207, "step": 7293 }, { "epoch": 2.4860259032038172, "grad_norm": 12.103499737354694, "learning_rate": 8.690721248840117e-07, "loss": 1.5234, "step": 7294 }, { "epoch": 2.4863667348329925, "grad_norm": 13.28495703757433, "learning_rate": 8.679551873101033e-07, "loss": 1.5038, "step": 7295 }, { "epoch": 2.486707566462168, "grad_norm": 23.62572993506726, "learning_rate": 8.66838899723293e-07, "loss": 1.2484, "step": 7296 }, { "epoch": 2.4870483980913427, "grad_norm": 12.882316914884425, "learning_rate": 8.657232622991768e-07, "loss": 1.6313, "step": 7297 }, { "epoch": 2.487389229720518, "grad_norm": 14.612987567502403, "learning_rate": 8.64608275213249e-07, "loss": 1.2636, "step": 7298 }, { "epoch": 2.4877300613496933, "grad_norm": 17.744060436582874, "learning_rate": 8.634939386409019e-07, "loss": 2.0273, "step": 7299 }, { "epoch": 2.4880708929788686, "grad_norm": 16.36339956859085, "learning_rate": 8.623802527574243e-07, "loss": 1.2677, "step": 7300 }, { "epoch": 2.4884117246080435, "grad_norm": 23.123910063041055, "learning_rate": 8.612672177380038e-07, "loss": 1.2786, "step": 7301 }, { "epoch": 2.488752556237219, "grad_norm": 19.265320379996886, "learning_rate": 8.601548337577259e-07, "loss": 1.8027, "step": 7302 }, { "epoch": 2.489093387866394, "grad_norm": 16.374129960695843, "learning_rate": 8.590431009915734e-07, "loss": 1.6699, "step": 7303 }, { "epoch": 2.4894342194955694, "grad_norm": 12.830818275697034, "learning_rate": 8.579320196144225e-07, "loss": 1.0333, "step": 7304 }, { "epoch": 2.4897750511247443, "grad_norm": 19.649175584007732, "learning_rate": 8.568215898010551e-07, "loss": 1.9452, "step": 7305 }, { "epoch": 2.4901158827539196, "grad_norm": 15.097972362608665, "learning_rate": 8.55711811726146e-07, "loss": 1.4861, "step": 7306 }, { "epoch": 2.490456714383095, "grad_norm": 14.40721914423309, "learning_rate": 8.546026855642631e-07, "loss": 1.4387, "step": 7307 }, { "epoch": 2.4907975460122698, "grad_norm": 18.818559534259357, "learning_rate": 8.534942114898831e-07, "loss": 1.6418, "step": 7308 }, { "epoch": 2.491138377641445, "grad_norm": 15.355395683992542, "learning_rate": 8.523863896773682e-07, "loss": 1.0962, "step": 7309 }, { "epoch": 2.4914792092706204, "grad_norm": 13.672089660936196, "learning_rate": 8.512792203009834e-07, "loss": 1.44, "step": 7310 }, { "epoch": 2.4918200408997953, "grad_norm": 14.639548549627127, "learning_rate": 8.501727035348956e-07, "loss": 1.1745, "step": 7311 }, { "epoch": 2.4921608725289706, "grad_norm": 11.987301842415631, "learning_rate": 8.490668395531592e-07, "loss": 1.0465, "step": 7312 }, { "epoch": 2.492501704158146, "grad_norm": 16.509511061867904, "learning_rate": 8.479616285297331e-07, "loss": 1.4955, "step": 7313 }, { "epoch": 2.492842535787321, "grad_norm": 13.789730819556569, "learning_rate": 8.46857070638471e-07, "loss": 1.327, "step": 7314 }, { "epoch": 2.4931833674164965, "grad_norm": 25.47945493986472, "learning_rate": 8.457531660531243e-07, "loss": 1.17, "step": 7315 }, { "epoch": 2.4935241990456714, "grad_norm": 12.086911735336566, "learning_rate": 8.446499149473419e-07, "loss": 1.3855, "step": 7316 }, { "epoch": 2.4938650306748467, "grad_norm": 14.750849672338111, "learning_rate": 8.435473174946695e-07, "loss": 1.5196, "step": 7317 }, { "epoch": 2.494205862304022, "grad_norm": 15.129561697219751, "learning_rate": 8.424453738685495e-07, "loss": 1.8154, "step": 7318 }, { "epoch": 2.494546693933197, "grad_norm": 16.576557855776038, "learning_rate": 8.413440842423226e-07, "loss": 1.2594, "step": 7319 }, { "epoch": 2.494887525562372, "grad_norm": 17.581886249802572, "learning_rate": 8.402434487892264e-07, "loss": 1.3065, "step": 7320 }, { "epoch": 2.4952283571915475, "grad_norm": 16.815338922423148, "learning_rate": 8.391434676823951e-07, "loss": 1.7271, "step": 7321 }, { "epoch": 2.4955691888207223, "grad_norm": 16.600701314810742, "learning_rate": 8.38044141094858e-07, "loss": 1.6915, "step": 7322 }, { "epoch": 2.4959100204498976, "grad_norm": 11.988104697693768, "learning_rate": 8.36945469199546e-07, "loss": 0.9004, "step": 7323 }, { "epoch": 2.496250852079073, "grad_norm": 10.399512918248465, "learning_rate": 8.358474521692855e-07, "loss": 0.6607, "step": 7324 }, { "epoch": 2.4965916837082482, "grad_norm": 19.616397109801234, "learning_rate": 8.347500901767947e-07, "loss": 2.0397, "step": 7325 }, { "epoch": 2.4969325153374236, "grad_norm": 16.053113989114443, "learning_rate": 8.33653383394698e-07, "loss": 1.5719, "step": 7326 }, { "epoch": 2.4972733469665984, "grad_norm": 14.887494043534236, "learning_rate": 8.325573319955083e-07, "loss": 1.4806, "step": 7327 }, { "epoch": 2.4976141785957737, "grad_norm": 19.450787495956874, "learning_rate": 8.314619361516396e-07, "loss": 1.5588, "step": 7328 }, { "epoch": 2.497955010224949, "grad_norm": 10.859380862098183, "learning_rate": 8.303671960354026e-07, "loss": 0.7832, "step": 7329 }, { "epoch": 2.498295841854124, "grad_norm": 16.284460310556423, "learning_rate": 8.292731118190039e-07, "loss": 1.767, "step": 7330 }, { "epoch": 2.498636673483299, "grad_norm": 13.651347310241343, "learning_rate": 8.281796836745476e-07, "loss": 1.5013, "step": 7331 }, { "epoch": 2.4989775051124745, "grad_norm": 13.557289607670956, "learning_rate": 8.270869117740343e-07, "loss": 1.2004, "step": 7332 }, { "epoch": 2.4993183367416494, "grad_norm": 18.826562863831565, "learning_rate": 8.259947962893605e-07, "loss": 1.1302, "step": 7333 }, { "epoch": 2.4996591683708247, "grad_norm": 22.89463103124782, "learning_rate": 8.249033373923215e-07, "loss": 1.6083, "step": 7334 }, { "epoch": 2.5, "grad_norm": 21.10486774315084, "learning_rate": 8.238125352546078e-07, "loss": 1.4667, "step": 7335 }, { "epoch": 2.5003408316291753, "grad_norm": 8.977924145551727, "learning_rate": 8.22722390047806e-07, "loss": 1.0005, "step": 7336 }, { "epoch": 2.5006816632583506, "grad_norm": 13.69659112590352, "learning_rate": 8.216329019434028e-07, "loss": 1.9537, "step": 7337 }, { "epoch": 2.5010224948875255, "grad_norm": 10.178160168628994, "learning_rate": 8.205440711127749e-07, "loss": 0.7666, "step": 7338 }, { "epoch": 2.501363326516701, "grad_norm": 13.792566914346132, "learning_rate": 8.194558977272027e-07, "loss": 1.6983, "step": 7339 }, { "epoch": 2.501704158145876, "grad_norm": 14.99074874198622, "learning_rate": 8.183683819578614e-07, "loss": 1.3055, "step": 7340 }, { "epoch": 2.502044989775051, "grad_norm": 8.28071515922257, "learning_rate": 8.17281523975817e-07, "loss": 0.8247, "step": 7341 }, { "epoch": 2.5023858214042263, "grad_norm": 14.880819963916805, "learning_rate": 8.16195323952042e-07, "loss": 1.4722, "step": 7342 }, { "epoch": 2.5027266530334016, "grad_norm": 15.455770904442437, "learning_rate": 8.151097820573955e-07, "loss": 1.5712, "step": 7343 }, { "epoch": 2.5030674846625764, "grad_norm": 19.713317092032014, "learning_rate": 8.140248984626387e-07, "loss": 1.6699, "step": 7344 }, { "epoch": 2.5034083162917518, "grad_norm": 15.659107460063895, "learning_rate": 8.12940673338431e-07, "loss": 1.9309, "step": 7345 }, { "epoch": 2.503749147920927, "grad_norm": 17.783972067668234, "learning_rate": 8.118571068553222e-07, "loss": 1.9212, "step": 7346 }, { "epoch": 2.5040899795501024, "grad_norm": 14.48541665206304, "learning_rate": 8.10774199183762e-07, "loss": 1.6459, "step": 7347 }, { "epoch": 2.5044308111792777, "grad_norm": 17.664023263507936, "learning_rate": 8.09691950494097e-07, "loss": 1.5613, "step": 7348 }, { "epoch": 2.5047716428084525, "grad_norm": 21.373022931698106, "learning_rate": 8.086103609565688e-07, "loss": 1.4619, "step": 7349 }, { "epoch": 2.505112474437628, "grad_norm": 19.584549353159748, "learning_rate": 8.075294307413156e-07, "loss": 1.4934, "step": 7350 }, { "epoch": 2.505453306066803, "grad_norm": 13.232044562144287, "learning_rate": 8.064491600183722e-07, "loss": 1.685, "step": 7351 }, { "epoch": 2.505794137695978, "grad_norm": 11.054763609237785, "learning_rate": 8.053695489576697e-07, "loss": 0.7657, "step": 7352 }, { "epoch": 2.5061349693251533, "grad_norm": 20.24879136025583, "learning_rate": 8.04290597729035e-07, "loss": 1.589, "step": 7353 }, { "epoch": 2.5064758009543286, "grad_norm": 12.049023010676693, "learning_rate": 8.032123065021913e-07, "loss": 1.045, "step": 7354 }, { "epoch": 2.5068166325835035, "grad_norm": 14.375461201391476, "learning_rate": 8.021346754467596e-07, "loss": 2.1813, "step": 7355 }, { "epoch": 2.507157464212679, "grad_norm": 14.998850711403684, "learning_rate": 8.010577047322515e-07, "loss": 1.3718, "step": 7356 }, { "epoch": 2.507498295841854, "grad_norm": 15.34967352374433, "learning_rate": 7.999813945280826e-07, "loss": 1.8656, "step": 7357 }, { "epoch": 2.5078391274710294, "grad_norm": 13.602744070199778, "learning_rate": 7.989057450035614e-07, "loss": 1.7002, "step": 7358 }, { "epoch": 2.5081799591002047, "grad_norm": 14.276615019224245, "learning_rate": 7.978307563278881e-07, "loss": 1.1421, "step": 7359 }, { "epoch": 2.5085207907293796, "grad_norm": 14.23944293588689, "learning_rate": 7.967564286701656e-07, "loss": 1.5142, "step": 7360 }, { "epoch": 2.508861622358555, "grad_norm": 13.996526179131441, "learning_rate": 7.956827621993885e-07, "loss": 1.2263, "step": 7361 }, { "epoch": 2.5092024539877302, "grad_norm": 14.233305213942131, "learning_rate": 7.946097570844491e-07, "loss": 1.8298, "step": 7362 }, { "epoch": 2.509543285616905, "grad_norm": 16.784348897198672, "learning_rate": 7.935374134941359e-07, "loss": 1.3478, "step": 7363 }, { "epoch": 2.5098841172460804, "grad_norm": 15.572479093873591, "learning_rate": 7.92465731597133e-07, "loss": 2.0343, "step": 7364 }, { "epoch": 2.5102249488752557, "grad_norm": 12.384354815029436, "learning_rate": 7.913947115620191e-07, "loss": 1.5259, "step": 7365 }, { "epoch": 2.5105657805044306, "grad_norm": 12.359896086592785, "learning_rate": 7.903243535572708e-07, "loss": 1.1324, "step": 7366 }, { "epoch": 2.510906612133606, "grad_norm": 15.344179204428357, "learning_rate": 7.892546577512589e-07, "loss": 1.515, "step": 7367 }, { "epoch": 2.511247443762781, "grad_norm": 13.647111740340657, "learning_rate": 7.881856243122521e-07, "loss": 1.363, "step": 7368 }, { "epoch": 2.5115882753919565, "grad_norm": 17.415695786671527, "learning_rate": 7.871172534084126e-07, "loss": 1.4744, "step": 7369 }, { "epoch": 2.511929107021132, "grad_norm": 15.35150354076101, "learning_rate": 7.860495452077993e-07, "loss": 1.5351, "step": 7370 }, { "epoch": 2.5122699386503067, "grad_norm": 15.78615190601747, "learning_rate": 7.84982499878369e-07, "loss": 1.2808, "step": 7371 }, { "epoch": 2.512610770279482, "grad_norm": 18.857883777610134, "learning_rate": 7.839161175879679e-07, "loss": 1.873, "step": 7372 }, { "epoch": 2.5129516019086573, "grad_norm": 12.960363357502438, "learning_rate": 7.828503985043462e-07, "loss": 1.1541, "step": 7373 }, { "epoch": 2.513292433537832, "grad_norm": 16.170782737568736, "learning_rate": 7.817853427951449e-07, "loss": 1.3803, "step": 7374 }, { "epoch": 2.5136332651670075, "grad_norm": 15.160987559636919, "learning_rate": 7.80720950627899e-07, "loss": 1.2531, "step": 7375 }, { "epoch": 2.5139740967961828, "grad_norm": 14.528107256723471, "learning_rate": 7.796572221700461e-07, "loss": 1.7039, "step": 7376 }, { "epoch": 2.5143149284253576, "grad_norm": 12.377666602027306, "learning_rate": 7.785941575889106e-07, "loss": 1.3853, "step": 7377 }, { "epoch": 2.514655760054533, "grad_norm": 16.618353786038536, "learning_rate": 7.775317570517177e-07, "loss": 1.7623, "step": 7378 }, { "epoch": 2.5149965916837083, "grad_norm": 23.430186925953976, "learning_rate": 7.764700207255904e-07, "loss": 1.8534, "step": 7379 }, { "epoch": 2.5153374233128836, "grad_norm": 17.915662250660127, "learning_rate": 7.754089487775406e-07, "loss": 1.2594, "step": 7380 }, { "epoch": 2.515678254942059, "grad_norm": 15.397664422691138, "learning_rate": 7.743485413744805e-07, "loss": 1.8082, "step": 7381 }, { "epoch": 2.5160190865712337, "grad_norm": 16.448836143967586, "learning_rate": 7.73288798683216e-07, "loss": 1.5979, "step": 7382 }, { "epoch": 2.516359918200409, "grad_norm": 11.23949734230631, "learning_rate": 7.722297208704488e-07, "loss": 1.1814, "step": 7383 }, { "epoch": 2.5167007498295844, "grad_norm": 15.010891263826833, "learning_rate": 7.711713081027767e-07, "loss": 1.2757, "step": 7384 }, { "epoch": 2.5170415814587592, "grad_norm": 14.074609454795562, "learning_rate": 7.701135605466914e-07, "loss": 1.261, "step": 7385 }, { "epoch": 2.5173824130879345, "grad_norm": 13.764414447460863, "learning_rate": 7.690564783685817e-07, "loss": 1.0638, "step": 7386 }, { "epoch": 2.51772324471711, "grad_norm": 16.022714723594095, "learning_rate": 7.680000617347299e-07, "loss": 1.2868, "step": 7387 }, { "epoch": 2.5180640763462847, "grad_norm": 11.799480559366899, "learning_rate": 7.669443108113151e-07, "loss": 1.6154, "step": 7388 }, { "epoch": 2.51840490797546, "grad_norm": 11.768644236199794, "learning_rate": 7.658892257644113e-07, "loss": 0.939, "step": 7389 }, { "epoch": 2.5187457396046353, "grad_norm": 11.465430660173231, "learning_rate": 7.64834806759987e-07, "loss": 1.4763, "step": 7390 }, { "epoch": 2.5190865712338106, "grad_norm": 10.422517501772361, "learning_rate": 7.637810539639067e-07, "loss": 1.3888, "step": 7391 }, { "epoch": 2.519427402862986, "grad_norm": 15.43662398509854, "learning_rate": 7.627279675419319e-07, "loss": 1.5599, "step": 7392 }, { "epoch": 2.519768234492161, "grad_norm": 12.820704781004274, "learning_rate": 7.616755476597137e-07, "loss": 1.0902, "step": 7393 }, { "epoch": 2.520109066121336, "grad_norm": 10.917417252559016, "learning_rate": 7.606237944828026e-07, "loss": 0.925, "step": 7394 }, { "epoch": 2.5204498977505114, "grad_norm": 14.347677486464546, "learning_rate": 7.595727081766468e-07, "loss": 1.2378, "step": 7395 }, { "epoch": 2.5207907293796863, "grad_norm": 13.640542659126242, "learning_rate": 7.585222889065829e-07, "loss": 0.7724, "step": 7396 }, { "epoch": 2.5211315610088616, "grad_norm": 13.187901524998502, "learning_rate": 7.574725368378477e-07, "loss": 1.2352, "step": 7397 }, { "epoch": 2.521472392638037, "grad_norm": 30.348865617751507, "learning_rate": 7.564234521355707e-07, "loss": 1.2088, "step": 7398 }, { "epoch": 2.5218132242672118, "grad_norm": 19.194658111405282, "learning_rate": 7.553750349647776e-07, "loss": 1.2974, "step": 7399 }, { "epoch": 2.522154055896387, "grad_norm": 16.331683618798603, "learning_rate": 7.543272854903882e-07, "loss": 2.1875, "step": 7400 }, { "epoch": 2.5224948875255624, "grad_norm": 11.124908017083714, "learning_rate": 7.53280203877218e-07, "loss": 1.1949, "step": 7401 }, { "epoch": 2.5228357191547377, "grad_norm": 14.127976827762394, "learning_rate": 7.522337902899768e-07, "loss": 1.6357, "step": 7402 }, { "epoch": 2.523176550783913, "grad_norm": 18.998008078900295, "learning_rate": 7.511880448932696e-07, "loss": 1.4481, "step": 7403 }, { "epoch": 2.523517382413088, "grad_norm": 13.49533295551422, "learning_rate": 7.50142967851597e-07, "loss": 1.4648, "step": 7404 }, { "epoch": 2.523858214042263, "grad_norm": 15.337134188455785, "learning_rate": 7.490985593293537e-07, "loss": 2.1124, "step": 7405 }, { "epoch": 2.5241990456714385, "grad_norm": 14.116031960572537, "learning_rate": 7.480548194908271e-07, "loss": 1.272, "step": 7406 }, { "epoch": 2.5245398773006134, "grad_norm": 9.900935165223613, "learning_rate": 7.470117485002044e-07, "loss": 1.1918, "step": 7407 }, { "epoch": 2.5248807089297887, "grad_norm": 14.37814763734437, "learning_rate": 7.459693465215651e-07, "loss": 1.4858, "step": 7408 }, { "epoch": 2.525221540558964, "grad_norm": 12.582151709465794, "learning_rate": 7.449276137188794e-07, "loss": 1.0836, "step": 7409 }, { "epoch": 2.525562372188139, "grad_norm": 16.335214110110382, "learning_rate": 7.438865502560211e-07, "loss": 2.134, "step": 7410 }, { "epoch": 2.525903203817314, "grad_norm": 26.137384615545418, "learning_rate": 7.428461562967499e-07, "loss": 1.6227, "step": 7411 }, { "epoch": 2.5262440354464895, "grad_norm": 14.979597600658169, "learning_rate": 7.418064320047236e-07, "loss": 1.2952, "step": 7412 }, { "epoch": 2.5265848670756648, "grad_norm": 15.814414130009325, "learning_rate": 7.407673775434987e-07, "loss": 1.715, "step": 7413 }, { "epoch": 2.52692569870484, "grad_norm": 32.81427698791769, "learning_rate": 7.397289930765195e-07, "loss": 1.718, "step": 7414 }, { "epoch": 2.527266530334015, "grad_norm": 13.842699621292997, "learning_rate": 7.386912787671286e-07, "loss": 1.7704, "step": 7415 }, { "epoch": 2.5276073619631902, "grad_norm": 15.666791842269564, "learning_rate": 7.376542347785631e-07, "loss": 1.3374, "step": 7416 }, { "epoch": 2.5279481935923656, "grad_norm": 12.902460042865902, "learning_rate": 7.366178612739538e-07, "loss": 1.0773, "step": 7417 }, { "epoch": 2.5282890252215404, "grad_norm": 12.424832870006833, "learning_rate": 7.355821584163264e-07, "loss": 1.3416, "step": 7418 }, { "epoch": 2.5286298568507157, "grad_norm": 13.691257950976492, "learning_rate": 7.345471263686011e-07, "loss": 0.901, "step": 7419 }, { "epoch": 2.528970688479891, "grad_norm": 14.745461203725633, "learning_rate": 7.335127652935925e-07, "loss": 1.3336, "step": 7420 }, { "epoch": 2.529311520109066, "grad_norm": 17.01075597041676, "learning_rate": 7.324790753540101e-07, "loss": 1.6479, "step": 7421 }, { "epoch": 2.529652351738241, "grad_norm": 22.0880869909676, "learning_rate": 7.314460567124571e-07, "loss": 1.3919, "step": 7422 }, { "epoch": 2.5299931833674165, "grad_norm": 14.840927920129124, "learning_rate": 7.304137095314312e-07, "loss": 1.9994, "step": 7423 }, { "epoch": 2.530334014996592, "grad_norm": 12.385464311332294, "learning_rate": 7.293820339733265e-07, "loss": 1.6584, "step": 7424 }, { "epoch": 2.530674846625767, "grad_norm": 14.182593846513335, "learning_rate": 7.28351030200426e-07, "loss": 1.5844, "step": 7425 }, { "epoch": 2.531015678254942, "grad_norm": 13.54925437503414, "learning_rate": 7.273206983749148e-07, "loss": 1.129, "step": 7426 }, { "epoch": 2.5313565098841173, "grad_norm": 13.618172048926102, "learning_rate": 7.262910386588657e-07, "loss": 1.3588, "step": 7427 }, { "epoch": 2.5316973415132926, "grad_norm": 15.975769236053301, "learning_rate": 7.252620512142472e-07, "loss": 1.8363, "step": 7428 }, { "epoch": 2.5320381731424675, "grad_norm": 24.210289699663964, "learning_rate": 7.242337362029273e-07, "loss": 0.6429, "step": 7429 }, { "epoch": 2.532379004771643, "grad_norm": 21.201744509798647, "learning_rate": 7.232060937866602e-07, "loss": 1.6521, "step": 7430 }, { "epoch": 2.532719836400818, "grad_norm": 17.0037989097734, "learning_rate": 7.221791241270998e-07, "loss": 1.5373, "step": 7431 }, { "epoch": 2.533060668029993, "grad_norm": 11.681451731553782, "learning_rate": 7.21152827385792e-07, "loss": 0.9405, "step": 7432 }, { "epoch": 2.5334014996591683, "grad_norm": 22.021592518746917, "learning_rate": 7.20127203724178e-07, "loss": 0.8153, "step": 7433 }, { "epoch": 2.5337423312883436, "grad_norm": 12.896057103080716, "learning_rate": 7.191022533035913e-07, "loss": 1.6441, "step": 7434 }, { "epoch": 2.534083162917519, "grad_norm": 18.524799128037106, "learning_rate": 7.180779762852618e-07, "loss": 2.0216, "step": 7435 }, { "epoch": 2.534423994546694, "grad_norm": 16.2142834908373, "learning_rate": 7.170543728303119e-07, "loss": 1.2325, "step": 7436 }, { "epoch": 2.534764826175869, "grad_norm": 15.470132078984223, "learning_rate": 7.160314430997589e-07, "loss": 1.4961, "step": 7437 }, { "epoch": 2.5351056578050444, "grad_norm": 15.859849735897038, "learning_rate": 7.150091872545129e-07, "loss": 1.6399, "step": 7438 }, { "epoch": 2.5354464894342197, "grad_norm": 23.29977911765609, "learning_rate": 7.139876054553807e-07, "loss": 1.8268, "step": 7439 }, { "epoch": 2.5357873210633946, "grad_norm": 15.585284149163423, "learning_rate": 7.129666978630579e-07, "loss": 1.5916, "step": 7440 }, { "epoch": 2.53612815269257, "grad_norm": 16.425190801716614, "learning_rate": 7.119464646381397e-07, "loss": 1.2795, "step": 7441 }, { "epoch": 2.536468984321745, "grad_norm": 23.377126824157838, "learning_rate": 7.109269059411139e-07, "loss": 1.3624, "step": 7442 }, { "epoch": 2.53680981595092, "grad_norm": 14.67346501935475, "learning_rate": 7.099080219323567e-07, "loss": 1.8565, "step": 7443 }, { "epoch": 2.5371506475800953, "grad_norm": 14.010663884799238, "learning_rate": 7.088898127721489e-07, "loss": 0.7907, "step": 7444 }, { "epoch": 2.5374914792092707, "grad_norm": 16.79252131885413, "learning_rate": 7.078722786206532e-07, "loss": 1.2841, "step": 7445 }, { "epoch": 2.537832310838446, "grad_norm": 15.453622519245345, "learning_rate": 7.068554196379335e-07, "loss": 1.8678, "step": 7446 }, { "epoch": 2.5381731424676213, "grad_norm": 13.690597144248576, "learning_rate": 7.058392359839483e-07, "loss": 1.574, "step": 7447 }, { "epoch": 2.538513974096796, "grad_norm": 11.088629490097476, "learning_rate": 7.048237278185449e-07, "loss": 0.8778, "step": 7448 }, { "epoch": 2.5388548057259714, "grad_norm": 40.03221779026003, "learning_rate": 7.038088953014666e-07, "loss": 1.79, "step": 7449 }, { "epoch": 2.5391956373551468, "grad_norm": 10.48398376501175, "learning_rate": 7.027947385923511e-07, "loss": 0.938, "step": 7450 }, { "epoch": 2.5395364689843216, "grad_norm": 15.536694302111991, "learning_rate": 7.017812578507299e-07, "loss": 1.3474, "step": 7451 }, { "epoch": 2.539877300613497, "grad_norm": 16.2709075221783, "learning_rate": 7.007684532360265e-07, "loss": 1.5791, "step": 7452 }, { "epoch": 2.5402181322426722, "grad_norm": 15.60145672252382, "learning_rate": 6.9975632490756e-07, "loss": 1.194, "step": 7453 }, { "epoch": 2.540558963871847, "grad_norm": 12.002147655828264, "learning_rate": 6.987448730245422e-07, "loss": 1.3491, "step": 7454 }, { "epoch": 2.5408997955010224, "grad_norm": 9.868236115850912, "learning_rate": 6.977340977460778e-07, "loss": 1.0245, "step": 7455 }, { "epoch": 2.5412406271301977, "grad_norm": 17.341225885734325, "learning_rate": 6.967239992311658e-07, "loss": 1.748, "step": 7456 }, { "epoch": 2.541581458759373, "grad_norm": 15.842698171608319, "learning_rate": 6.957145776386998e-07, "loss": 1.3395, "step": 7457 }, { "epoch": 2.5419222903885483, "grad_norm": 15.35613491589239, "learning_rate": 6.947058331274658e-07, "loss": 1.5127, "step": 7458 }, { "epoch": 2.542263122017723, "grad_norm": 16.994190976461557, "learning_rate": 6.936977658561405e-07, "loss": 1.2297, "step": 7459 }, { "epoch": 2.5426039536468985, "grad_norm": 19.367637297241817, "learning_rate": 6.926903759833009e-07, "loss": 2.0924, "step": 7460 }, { "epoch": 2.542944785276074, "grad_norm": 10.610100308042385, "learning_rate": 6.916836636674113e-07, "loss": 1.1283, "step": 7461 }, { "epoch": 2.5432856169052487, "grad_norm": 17.881806431530663, "learning_rate": 6.906776290668299e-07, "loss": 1.2584, "step": 7462 }, { "epoch": 2.543626448534424, "grad_norm": 16.439164700097205, "learning_rate": 6.896722723398142e-07, "loss": 1.1934, "step": 7463 }, { "epoch": 2.5439672801635993, "grad_norm": 11.833445767299883, "learning_rate": 6.886675936445075e-07, "loss": 1.4468, "step": 7464 }, { "epoch": 2.544308111792774, "grad_norm": 10.311150731931095, "learning_rate": 6.876635931389508e-07, "loss": 0.7819, "step": 7465 }, { "epoch": 2.5446489434219495, "grad_norm": 13.69926120122674, "learning_rate": 6.866602709810777e-07, "loss": 1.1414, "step": 7466 }, { "epoch": 2.544989775051125, "grad_norm": 15.790945826421298, "learning_rate": 6.856576273287136e-07, "loss": 1.5101, "step": 7467 }, { "epoch": 2.5453306066803, "grad_norm": 13.357162892711615, "learning_rate": 6.846556623395795e-07, "loss": 1.3342, "step": 7468 }, { "epoch": 2.545671438309475, "grad_norm": 9.999257473381297, "learning_rate": 6.836543761712883e-07, "loss": 1.0993, "step": 7469 }, { "epoch": 2.5460122699386503, "grad_norm": 15.456092316377015, "learning_rate": 6.826537689813462e-07, "loss": 1.8562, "step": 7470 }, { "epoch": 2.5463531015678256, "grad_norm": 13.350889786251932, "learning_rate": 6.816538409271522e-07, "loss": 1.3233, "step": 7471 }, { "epoch": 2.546693933197001, "grad_norm": 16.00767677219847, "learning_rate": 6.806545921659996e-07, "loss": 0.9706, "step": 7472 }, { "epoch": 2.5470347648261757, "grad_norm": 12.916934163471954, "learning_rate": 6.796560228550747e-07, "loss": 1.3898, "step": 7473 }, { "epoch": 2.547375596455351, "grad_norm": 23.521405366526686, "learning_rate": 6.786581331514535e-07, "loss": 1.7989, "step": 7474 }, { "epoch": 2.5477164280845264, "grad_norm": 11.309291526506726, "learning_rate": 6.776609232121118e-07, "loss": 1.2695, "step": 7475 }, { "epoch": 2.5480572597137012, "grad_norm": 21.53296969970772, "learning_rate": 6.766643931939143e-07, "loss": 1.2414, "step": 7476 }, { "epoch": 2.5483980913428765, "grad_norm": 19.91683270165153, "learning_rate": 6.756685432536153e-07, "loss": 1.3956, "step": 7477 }, { "epoch": 2.548738922972052, "grad_norm": 12.69360505543974, "learning_rate": 6.746733735478711e-07, "loss": 1.4268, "step": 7478 }, { "epoch": 2.549079754601227, "grad_norm": 19.634314026974394, "learning_rate": 6.73678884233222e-07, "loss": 1.353, "step": 7479 }, { "epoch": 2.549420586230402, "grad_norm": 17.358765437518457, "learning_rate": 6.726850754661057e-07, "loss": 1.7538, "step": 7480 }, { "epoch": 2.5497614178595773, "grad_norm": 26.144711291586887, "learning_rate": 6.716919474028549e-07, "loss": 1.1928, "step": 7481 }, { "epoch": 2.5501022494887526, "grad_norm": 24.522609041085005, "learning_rate": 6.706995001996897e-07, "loss": 1.266, "step": 7482 }, { "epoch": 2.550443081117928, "grad_norm": 11.39086458431068, "learning_rate": 6.697077340127262e-07, "loss": 1.5944, "step": 7483 }, { "epoch": 2.550783912747103, "grad_norm": 23.077119385971162, "learning_rate": 6.687166489979763e-07, "loss": 1.281, "step": 7484 }, { "epoch": 2.551124744376278, "grad_norm": 17.94143664758822, "learning_rate": 6.677262453113381e-07, "loss": 1.0701, "step": 7485 }, { "epoch": 2.5514655760054534, "grad_norm": 19.24041315783454, "learning_rate": 6.667365231086076e-07, "loss": 1.6118, "step": 7486 }, { "epoch": 2.5518064076346283, "grad_norm": 14.80668599994533, "learning_rate": 6.657474825454712e-07, "loss": 1.1814, "step": 7487 }, { "epoch": 2.5521472392638036, "grad_norm": 16.992535209583476, "learning_rate": 6.647591237775098e-07, "loss": 1.4056, "step": 7488 }, { "epoch": 2.552488070892979, "grad_norm": 22.3806552825105, "learning_rate": 6.637714469601969e-07, "loss": 2.0174, "step": 7489 }, { "epoch": 2.552828902522154, "grad_norm": 14.698556921374086, "learning_rate": 6.627844522488947e-07, "loss": 1.4303, "step": 7490 }, { "epoch": 2.553169734151329, "grad_norm": 20.35596216705866, "learning_rate": 6.617981397988649e-07, "loss": 1.013, "step": 7491 }, { "epoch": 2.5535105657805044, "grad_norm": 12.385620679160192, "learning_rate": 6.608125097652579e-07, "loss": 1.1662, "step": 7492 }, { "epoch": 2.5538513974096797, "grad_norm": 15.436551592414942, "learning_rate": 6.598275623031142e-07, "loss": 1.5772, "step": 7493 }, { "epoch": 2.554192229038855, "grad_norm": 14.34127917401642, "learning_rate": 6.588432975673742e-07, "loss": 1.2449, "step": 7494 }, { "epoch": 2.55453306066803, "grad_norm": 15.642030454053655, "learning_rate": 6.578597157128642e-07, "loss": 1.6719, "step": 7495 }, { "epoch": 2.554873892297205, "grad_norm": 8.782445128229245, "learning_rate": 6.56876816894304e-07, "loss": 1.1693, "step": 7496 }, { "epoch": 2.5552147239263805, "grad_norm": 18.058506836493294, "learning_rate": 6.558946012663125e-07, "loss": 1.2447, "step": 7497 }, { "epoch": 2.5555555555555554, "grad_norm": 14.340363798815176, "learning_rate": 6.549130689833915e-07, "loss": 1.7319, "step": 7498 }, { "epoch": 2.5558963871847307, "grad_norm": 13.727178991245914, "learning_rate": 6.539322201999421e-07, "loss": 1.6583, "step": 7499 }, { "epoch": 2.556237218813906, "grad_norm": 13.534538491793398, "learning_rate": 6.529520550702545e-07, "loss": 1.7296, "step": 7500 }, { "epoch": 2.556578050443081, "grad_norm": 18.519079930487678, "learning_rate": 6.519725737485139e-07, "loss": 1.4234, "step": 7501 }, { "epoch": 2.556918882072256, "grad_norm": 11.827437430681869, "learning_rate": 6.509937763887953e-07, "loss": 0.9797, "step": 7502 }, { "epoch": 2.5572597137014315, "grad_norm": 15.94786915910308, "learning_rate": 6.500156631450682e-07, "loss": 1.6703, "step": 7503 }, { "epoch": 2.5576005453306068, "grad_norm": 15.871260150276552, "learning_rate": 6.490382341711937e-07, "loss": 1.7732, "step": 7504 }, { "epoch": 2.557941376959782, "grad_norm": 15.146228473401834, "learning_rate": 6.480614896209253e-07, "loss": 1.8148, "step": 7505 }, { "epoch": 2.558282208588957, "grad_norm": 34.5192280080981, "learning_rate": 6.470854296479084e-07, "loss": 1.7932, "step": 7506 }, { "epoch": 2.5586230402181322, "grad_norm": 8.937878576555123, "learning_rate": 6.461100544056814e-07, "loss": 1.0607, "step": 7507 }, { "epoch": 2.5589638718473076, "grad_norm": 16.755890482089264, "learning_rate": 6.451353640476749e-07, "loss": 1.6062, "step": 7508 }, { "epoch": 2.5593047034764824, "grad_norm": 14.683883607711413, "learning_rate": 6.44161358727211e-07, "loss": 1.4719, "step": 7509 }, { "epoch": 2.5596455351056577, "grad_norm": 13.72906187230467, "learning_rate": 6.431880385975064e-07, "loss": 1.1821, "step": 7510 }, { "epoch": 2.559986366734833, "grad_norm": 14.657126653716805, "learning_rate": 6.422154038116646e-07, "loss": 1.5824, "step": 7511 }, { "epoch": 2.560327198364008, "grad_norm": 10.756076837967816, "learning_rate": 6.412434545226876e-07, "loss": 1.51, "step": 7512 }, { "epoch": 2.560668029993183, "grad_norm": 14.490943693257083, "learning_rate": 6.40272190883468e-07, "loss": 1.2049, "step": 7513 }, { "epoch": 2.5610088616223585, "grad_norm": 18.056694077025742, "learning_rate": 6.393016130467855e-07, "loss": 1.7334, "step": 7514 }, { "epoch": 2.561349693251534, "grad_norm": 18.468549011510394, "learning_rate": 6.383317211653206e-07, "loss": 1.7978, "step": 7515 }, { "epoch": 2.561690524880709, "grad_norm": 13.802332533766752, "learning_rate": 6.37362515391638e-07, "loss": 1.2166, "step": 7516 }, { "epoch": 2.562031356509884, "grad_norm": 13.975517711085459, "learning_rate": 6.363939958781968e-07, "loss": 1.5202, "step": 7517 }, { "epoch": 2.5623721881390593, "grad_norm": 28.932858433824084, "learning_rate": 6.354261627773528e-07, "loss": 1.3765, "step": 7518 }, { "epoch": 2.5627130197682346, "grad_norm": 17.50363424532597, "learning_rate": 6.344590162413467e-07, "loss": 1.363, "step": 7519 }, { "epoch": 2.5630538513974095, "grad_norm": 16.243152362631704, "learning_rate": 6.334925564223154e-07, "loss": 0.8087, "step": 7520 }, { "epoch": 2.563394683026585, "grad_norm": 13.676637689773194, "learning_rate": 6.32526783472287e-07, "loss": 1.1545, "step": 7521 }, { "epoch": 2.56373551465576, "grad_norm": 18.626554144425764, "learning_rate": 6.315616975431816e-07, "loss": 1.6043, "step": 7522 }, { "epoch": 2.564076346284935, "grad_norm": 14.68966546310584, "learning_rate": 6.30597298786812e-07, "loss": 1.3608, "step": 7523 }, { "epoch": 2.5644171779141103, "grad_norm": 21.410316737982576, "learning_rate": 6.296335873548787e-07, "loss": 1.4842, "step": 7524 }, { "epoch": 2.5647580095432856, "grad_norm": 16.585501366665145, "learning_rate": 6.28670563398981e-07, "loss": 0.7889, "step": 7525 }, { "epoch": 2.565098841172461, "grad_norm": 15.756272525125036, "learning_rate": 6.277082270706053e-07, "loss": 1.3538, "step": 7526 }, { "epoch": 2.565439672801636, "grad_norm": 17.386642965041144, "learning_rate": 6.267465785211291e-07, "loss": 1.3714, "step": 7527 }, { "epoch": 2.565780504430811, "grad_norm": 15.174932697990073, "learning_rate": 6.257856179018268e-07, "loss": 1.8268, "step": 7528 }, { "epoch": 2.5661213360599864, "grad_norm": 12.935610519893192, "learning_rate": 6.248253453638586e-07, "loss": 1.0392, "step": 7529 }, { "epoch": 2.5664621676891617, "grad_norm": 15.93124030674824, "learning_rate": 6.238657610582788e-07, "loss": 1.3081, "step": 7530 }, { "epoch": 2.5668029993183366, "grad_norm": 12.40525804926018, "learning_rate": 6.229068651360376e-07, "loss": 1.1104, "step": 7531 }, { "epoch": 2.567143830947512, "grad_norm": 12.795507694260392, "learning_rate": 6.219486577479694e-07, "loss": 1.2855, "step": 7532 }, { "epoch": 2.567484662576687, "grad_norm": 10.745389066872951, "learning_rate": 6.209911390448053e-07, "loss": 1.1567, "step": 7533 }, { "epoch": 2.567825494205862, "grad_norm": 14.301298562606224, "learning_rate": 6.200343091771671e-07, "loss": 1.2952, "step": 7534 }, { "epoch": 2.5681663258350373, "grad_norm": 14.34397698997906, "learning_rate": 6.190781682955676e-07, "loss": 1.2093, "step": 7535 }, { "epoch": 2.5685071574642127, "grad_norm": 12.579256822064343, "learning_rate": 6.181227165504123e-07, "loss": 1.3373, "step": 7536 }, { "epoch": 2.568847989093388, "grad_norm": 24.252253435035048, "learning_rate": 6.171679540919961e-07, "loss": 1.2051, "step": 7537 }, { "epoch": 2.5691888207225633, "grad_norm": 13.089668003072703, "learning_rate": 6.162138810705082e-07, "loss": 1.7413, "step": 7538 }, { "epoch": 2.569529652351738, "grad_norm": 21.068002866637766, "learning_rate": 6.152604976360271e-07, "loss": 1.1302, "step": 7539 }, { "epoch": 2.5698704839809134, "grad_norm": 19.496122597180182, "learning_rate": 6.143078039385247e-07, "loss": 1.7931, "step": 7540 }, { "epoch": 2.5702113156100888, "grad_norm": 13.628838270755262, "learning_rate": 6.133558001278633e-07, "loss": 1.0128, "step": 7541 }, { "epoch": 2.5705521472392636, "grad_norm": 19.55965554832037, "learning_rate": 6.12404486353797e-07, "loss": 1.6733, "step": 7542 }, { "epoch": 2.570892978868439, "grad_norm": 16.170665940253034, "learning_rate": 6.114538627659705e-07, "loss": 1.3652, "step": 7543 }, { "epoch": 2.5712338104976142, "grad_norm": 24.859163801150487, "learning_rate": 6.105039295139226e-07, "loss": 1.34, "step": 7544 }, { "epoch": 2.571574642126789, "grad_norm": 13.570133449185306, "learning_rate": 6.095546867470776e-07, "loss": 1.4805, "step": 7545 }, { "epoch": 2.5719154737559644, "grad_norm": 26.230587614994175, "learning_rate": 6.086061346147593e-07, "loss": 1.3337, "step": 7546 }, { "epoch": 2.5722563053851397, "grad_norm": 15.032702086385363, "learning_rate": 6.076582732661778e-07, "loss": 1.2779, "step": 7547 }, { "epoch": 2.572597137014315, "grad_norm": 19.000299418117, "learning_rate": 6.067111028504324e-07, "loss": 1.7698, "step": 7548 }, { "epoch": 2.5729379686434903, "grad_norm": 11.023054838700272, "learning_rate": 6.05764623516522e-07, "loss": 1.162, "step": 7549 }, { "epoch": 2.573278800272665, "grad_norm": 11.372707914925734, "learning_rate": 6.048188354133272e-07, "loss": 1.2493, "step": 7550 }, { "epoch": 2.5736196319018405, "grad_norm": 19.74772530419478, "learning_rate": 6.038737386896254e-07, "loss": 1.4789, "step": 7551 }, { "epoch": 2.573960463531016, "grad_norm": 18.413126976107833, "learning_rate": 6.029293334940862e-07, "loss": 1.2351, "step": 7552 }, { "epoch": 2.5743012951601907, "grad_norm": 11.407322138984718, "learning_rate": 6.019856199752655e-07, "loss": 1.1112, "step": 7553 }, { "epoch": 2.574642126789366, "grad_norm": 13.522349350624626, "learning_rate": 6.010425982816143e-07, "loss": 0.7853, "step": 7554 }, { "epoch": 2.5749829584185413, "grad_norm": 19.00770798126287, "learning_rate": 6.001002685614732e-07, "loss": 1.8336, "step": 7555 }, { "epoch": 2.575323790047716, "grad_norm": 21.289596753311248, "learning_rate": 5.991586309630754e-07, "loss": 0.8119, "step": 7556 }, { "epoch": 2.5756646216768915, "grad_norm": 14.838626665225064, "learning_rate": 5.982176856345445e-07, "loss": 1.5879, "step": 7557 }, { "epoch": 2.576005453306067, "grad_norm": 12.274013150311129, "learning_rate": 5.972774327238923e-07, "loss": 1.5406, "step": 7558 }, { "epoch": 2.576346284935242, "grad_norm": 24.81919635482738, "learning_rate": 5.96337872379027e-07, "loss": 1.49, "step": 7559 }, { "epoch": 2.5766871165644174, "grad_norm": 18.391527502845687, "learning_rate": 5.953990047477454e-07, "loss": 1.3091, "step": 7560 }, { "epoch": 2.5770279481935923, "grad_norm": 12.64798912522365, "learning_rate": 5.944608299777316e-07, "loss": 1.0762, "step": 7561 }, { "epoch": 2.5773687798227676, "grad_norm": 18.649494734354118, "learning_rate": 5.935233482165692e-07, "loss": 1.446, "step": 7562 }, { "epoch": 2.577709611451943, "grad_norm": 17.608749283860497, "learning_rate": 5.925865596117242e-07, "loss": 1.6205, "step": 7563 }, { "epoch": 2.5780504430811177, "grad_norm": 14.482754492052926, "learning_rate": 5.91650464310557e-07, "loss": 1.786, "step": 7564 }, { "epoch": 2.578391274710293, "grad_norm": 14.44253574409249, "learning_rate": 5.907150624603225e-07, "loss": 1.581, "step": 7565 }, { "epoch": 2.5787321063394684, "grad_norm": 15.332856588864901, "learning_rate": 5.897803542081604e-07, "loss": 1.8783, "step": 7566 }, { "epoch": 2.5790729379686432, "grad_norm": 24.083653045672996, "learning_rate": 5.888463397011046e-07, "loss": 1.2323, "step": 7567 }, { "epoch": 2.5794137695978185, "grad_norm": 18.079461208672765, "learning_rate": 5.879130190860788e-07, "loss": 1.4314, "step": 7568 }, { "epoch": 2.579754601226994, "grad_norm": 18.250169969097662, "learning_rate": 5.869803925098994e-07, "loss": 1.0131, "step": 7569 }, { "epoch": 2.580095432856169, "grad_norm": 12.78784255549784, "learning_rate": 5.860484601192706e-07, "loss": 1.0866, "step": 7570 }, { "epoch": 2.5804362644853445, "grad_norm": 15.016282839827316, "learning_rate": 5.851172220607903e-07, "loss": 1.2379, "step": 7571 }, { "epoch": 2.5807770961145193, "grad_norm": 11.108655055531205, "learning_rate": 5.841866784809457e-07, "loss": 1.1546, "step": 7572 }, { "epoch": 2.5811179277436946, "grad_norm": 15.480034018147299, "learning_rate": 5.832568295261148e-07, "loss": 1.5815, "step": 7573 }, { "epoch": 2.58145875937287, "grad_norm": 10.10583016966508, "learning_rate": 5.823276753425672e-07, "loss": 1.1795, "step": 7574 }, { "epoch": 2.581799591002045, "grad_norm": 23.109608434869788, "learning_rate": 5.813992160764614e-07, "loss": 1.1411, "step": 7575 }, { "epoch": 2.58214042263122, "grad_norm": 14.274630908388552, "learning_rate": 5.804714518738486e-07, "loss": 1.3388, "step": 7576 }, { "epoch": 2.5824812542603954, "grad_norm": 12.34232514961632, "learning_rate": 5.79544382880669e-07, "loss": 1.5614, "step": 7577 }, { "epoch": 2.5828220858895703, "grad_norm": 13.85441329552113, "learning_rate": 5.786180092427562e-07, "loss": 1.4195, "step": 7578 }, { "epoch": 2.5831629175187456, "grad_norm": 14.712760473473, "learning_rate": 5.776923311058292e-07, "loss": 1.2241, "step": 7579 }, { "epoch": 2.583503749147921, "grad_norm": 14.84973221142647, "learning_rate": 5.767673486155034e-07, "loss": 2.0231, "step": 7580 }, { "epoch": 2.5838445807770962, "grad_norm": 12.813271959484416, "learning_rate": 5.758430619172828e-07, "loss": 1.0574, "step": 7581 }, { "epoch": 2.5841854124062715, "grad_norm": 15.873306573370773, "learning_rate": 5.749194711565581e-07, "loss": 1.719, "step": 7582 }, { "epoch": 2.5845262440354464, "grad_norm": 17.77459886164835, "learning_rate": 5.739965764786177e-07, "loss": 1.8418, "step": 7583 }, { "epoch": 2.5848670756646217, "grad_norm": 15.26556484009428, "learning_rate": 5.73074378028634e-07, "loss": 1.2007, "step": 7584 }, { "epoch": 2.585207907293797, "grad_norm": 23.560816372382835, "learning_rate": 5.721528759516721e-07, "loss": 0.9166, "step": 7585 }, { "epoch": 2.585548738922972, "grad_norm": 16.23121556706933, "learning_rate": 5.712320703926905e-07, "loss": 1.4422, "step": 7586 }, { "epoch": 2.585889570552147, "grad_norm": 16.78378858772311, "learning_rate": 5.703119614965335e-07, "loss": 1.4507, "step": 7587 }, { "epoch": 2.5862304021813225, "grad_norm": 13.21942248440853, "learning_rate": 5.693925494079389e-07, "loss": 1.529, "step": 7588 }, { "epoch": 2.5865712338104974, "grad_norm": 15.924732289009594, "learning_rate": 5.684738342715329e-07, "loss": 1.0063, "step": 7589 }, { "epoch": 2.5869120654396727, "grad_norm": 12.923813563974988, "learning_rate": 5.67555816231834e-07, "loss": 1.2073, "step": 7590 }, { "epoch": 2.587252897068848, "grad_norm": 11.880310242561448, "learning_rate": 5.666384954332505e-07, "loss": 1.4026, "step": 7591 }, { "epoch": 2.5875937286980233, "grad_norm": 17.610140269477185, "learning_rate": 5.65721872020078e-07, "loss": 1.3519, "step": 7592 }, { "epoch": 2.5879345603271986, "grad_norm": 14.023329970917652, "learning_rate": 5.64805946136508e-07, "loss": 1.616, "step": 7593 }, { "epoch": 2.5882753919563735, "grad_norm": 14.68695762808496, "learning_rate": 5.63890717926619e-07, "loss": 1.1191, "step": 7594 }, { "epoch": 2.5886162235855488, "grad_norm": 10.044929801869007, "learning_rate": 5.629761875343776e-07, "loss": 0.9482, "step": 7595 }, { "epoch": 2.588957055214724, "grad_norm": 14.585230726878313, "learning_rate": 5.620623551036453e-07, "loss": 1.7916, "step": 7596 }, { "epoch": 2.589297886843899, "grad_norm": 21.632658396129237, "learning_rate": 5.611492207781715e-07, "loss": 1.2568, "step": 7597 }, { "epoch": 2.5896387184730743, "grad_norm": 25.547891239589216, "learning_rate": 5.602367847015938e-07, "loss": 1.3806, "step": 7598 }, { "epoch": 2.5899795501022496, "grad_norm": 15.360920041375469, "learning_rate": 5.59325047017446e-07, "loss": 1.2547, "step": 7599 }, { "epoch": 2.5903203817314244, "grad_norm": 16.886976862212187, "learning_rate": 5.584140078691441e-07, "loss": 1.084, "step": 7600 }, { "epoch": 2.5906612133605997, "grad_norm": 31.08548770146703, "learning_rate": 5.575036673999984e-07, "loss": 1.4638, "step": 7601 }, { "epoch": 2.591002044989775, "grad_norm": 12.376511342749444, "learning_rate": 5.565940257532132e-07, "loss": 1.09, "step": 7602 }, { "epoch": 2.5913428766189504, "grad_norm": 12.099344339808326, "learning_rate": 5.556850830718741e-07, "loss": 1.1049, "step": 7603 }, { "epoch": 2.5916837082481257, "grad_norm": 10.665697961255859, "learning_rate": 5.547768394989638e-07, "loss": 1.3053, "step": 7604 }, { "epoch": 2.5920245398773005, "grad_norm": 12.657019809649363, "learning_rate": 5.538692951773517e-07, "loss": 1.1978, "step": 7605 }, { "epoch": 2.592365371506476, "grad_norm": 18.163908797838573, "learning_rate": 5.529624502497988e-07, "loss": 1.6117, "step": 7606 }, { "epoch": 2.592706203135651, "grad_norm": 14.95999624579253, "learning_rate": 5.520563048589545e-07, "loss": 1.3498, "step": 7607 }, { "epoch": 2.593047034764826, "grad_norm": 20.506759943613577, "learning_rate": 5.511508591473602e-07, "loss": 1.1816, "step": 7608 }, { "epoch": 2.5933878663940013, "grad_norm": 15.223233374474844, "learning_rate": 5.502461132574455e-07, "loss": 1.3085, "step": 7609 }, { "epoch": 2.5937286980231766, "grad_norm": 13.969090668177662, "learning_rate": 5.493420673315308e-07, "loss": 0.9485, "step": 7610 }, { "epoch": 2.5940695296523515, "grad_norm": 9.883939125038136, "learning_rate": 5.484387215118259e-07, "loss": 1.0597, "step": 7611 }, { "epoch": 2.594410361281527, "grad_norm": 12.758281531446567, "learning_rate": 5.475360759404313e-07, "loss": 1.0226, "step": 7612 }, { "epoch": 2.594751192910702, "grad_norm": 19.21004808077746, "learning_rate": 5.466341307593348e-07, "loss": 1.7968, "step": 7613 }, { "epoch": 2.5950920245398774, "grad_norm": 14.862162960826184, "learning_rate": 5.457328861104177e-07, "loss": 1.6059, "step": 7614 }, { "epoch": 2.5954328561690527, "grad_norm": 19.632711903054332, "learning_rate": 5.448323421354507e-07, "loss": 1.963, "step": 7615 }, { "epoch": 2.5957736877982276, "grad_norm": 13.2227837257219, "learning_rate": 5.439324989760891e-07, "loss": 1.1763, "step": 7616 }, { "epoch": 2.596114519427403, "grad_norm": 16.278360233951233, "learning_rate": 5.430333567738854e-07, "loss": 1.5994, "step": 7617 }, { "epoch": 2.596455351056578, "grad_norm": 17.91166564730727, "learning_rate": 5.421349156702765e-07, "loss": 1.1361, "step": 7618 }, { "epoch": 2.596796182685753, "grad_norm": 20.717650706314274, "learning_rate": 5.41237175806591e-07, "loss": 1.2107, "step": 7619 }, { "epoch": 2.5971370143149284, "grad_norm": 12.3617290221532, "learning_rate": 5.403401373240469e-07, "loss": 1.7145, "step": 7620 }, { "epoch": 2.5974778459441037, "grad_norm": 11.417697086418292, "learning_rate": 5.39443800363752e-07, "loss": 1.0997, "step": 7621 }, { "epoch": 2.5978186775732786, "grad_norm": 13.692385585454653, "learning_rate": 5.385481650667041e-07, "loss": 1.5414, "step": 7622 }, { "epoch": 2.598159509202454, "grad_norm": 13.265281185028426, "learning_rate": 5.376532315737898e-07, "loss": 1.3023, "step": 7623 }, { "epoch": 2.598500340831629, "grad_norm": 14.708475582283823, "learning_rate": 5.367590000257855e-07, "loss": 1.7442, "step": 7624 }, { "epoch": 2.5988411724608045, "grad_norm": 12.855259601572147, "learning_rate": 5.358654705633576e-07, "loss": 1.3278, "step": 7625 }, { "epoch": 2.59918200408998, "grad_norm": 13.344797115151028, "learning_rate": 5.349726433270619e-07, "loss": 1.3636, "step": 7626 }, { "epoch": 2.5995228357191547, "grad_norm": 13.081721869459685, "learning_rate": 5.340805184573438e-07, "loss": 1.5649, "step": 7627 }, { "epoch": 2.59986366734833, "grad_norm": 16.298469232670715, "learning_rate": 5.33189096094539e-07, "loss": 1.2182, "step": 7628 }, { "epoch": 2.6002044989775053, "grad_norm": 13.839660768432886, "learning_rate": 5.322983763788686e-07, "loss": 1.5229, "step": 7629 }, { "epoch": 2.60054533060668, "grad_norm": 18.870286932878596, "learning_rate": 5.314083594504493e-07, "loss": 1.2202, "step": 7630 }, { "epoch": 2.6008861622358554, "grad_norm": 17.040723310454997, "learning_rate": 5.305190454492848e-07, "loss": 1.5158, "step": 7631 }, { "epoch": 2.6012269938650308, "grad_norm": 17.869614736498, "learning_rate": 5.296304345152642e-07, "loss": 1.6303, "step": 7632 }, { "epoch": 2.6015678254942056, "grad_norm": 13.229862487674161, "learning_rate": 5.28742526788174e-07, "loss": 0.6866, "step": 7633 }, { "epoch": 2.601908657123381, "grad_norm": 10.334670869950893, "learning_rate": 5.278553224076821e-07, "loss": 1.0687, "step": 7634 }, { "epoch": 2.6022494887525562, "grad_norm": 13.435897346647335, "learning_rate": 5.2696882151335e-07, "loss": 1.4004, "step": 7635 }, { "epoch": 2.6025903203817315, "grad_norm": 9.700081488956696, "learning_rate": 5.260830242446302e-07, "loss": 1.0694, "step": 7636 }, { "epoch": 2.602931152010907, "grad_norm": 13.0599868615492, "learning_rate": 5.251979307408589e-07, "loss": 1.4749, "step": 7637 }, { "epoch": 2.6032719836400817, "grad_norm": 11.395234467712974, "learning_rate": 5.243135411412665e-07, "loss": 1.4845, "step": 7638 }, { "epoch": 2.603612815269257, "grad_norm": 10.771842203211738, "learning_rate": 5.234298555849709e-07, "loss": 0.8352, "step": 7639 }, { "epoch": 2.6039536468984323, "grad_norm": 18.678632086138453, "learning_rate": 5.22546874210979e-07, "loss": 1.7726, "step": 7640 }, { "epoch": 2.604294478527607, "grad_norm": 16.53284863547154, "learning_rate": 5.21664597158188e-07, "loss": 0.9549, "step": 7641 }, { "epoch": 2.6046353101567825, "grad_norm": 13.379710825594456, "learning_rate": 5.207830245653827e-07, "loss": 1.4244, "step": 7642 }, { "epoch": 2.604976141785958, "grad_norm": 13.192893711602583, "learning_rate": 5.199021565712381e-07, "loss": 1.5713, "step": 7643 }, { "epoch": 2.6053169734151327, "grad_norm": 14.416678380847602, "learning_rate": 5.190219933143193e-07, "loss": 1.7034, "step": 7644 }, { "epoch": 2.605657805044308, "grad_norm": 18.743131930181526, "learning_rate": 5.181425349330776e-07, "loss": 1.4534, "step": 7645 }, { "epoch": 2.6059986366734833, "grad_norm": 15.085510731503705, "learning_rate": 5.172637815658583e-07, "loss": 1.4469, "step": 7646 }, { "epoch": 2.6063394683026586, "grad_norm": 14.241416708138514, "learning_rate": 5.163857333508888e-07, "loss": 1.4272, "step": 7647 }, { "epoch": 2.606680299931834, "grad_norm": 20.9475130667021, "learning_rate": 5.155083904262925e-07, "loss": 1.3887, "step": 7648 }, { "epoch": 2.607021131561009, "grad_norm": 9.65156605279421, "learning_rate": 5.146317529300787e-07, "loss": 1.3087, "step": 7649 }, { "epoch": 2.607361963190184, "grad_norm": 15.572629204454698, "learning_rate": 5.137558210001437e-07, "loss": 1.9056, "step": 7650 }, { "epoch": 2.6077027948193594, "grad_norm": 14.545494644640566, "learning_rate": 5.128805947742788e-07, "loss": 1.4606, "step": 7651 }, { "epoch": 2.6080436264485343, "grad_norm": 12.764362570214423, "learning_rate": 5.120060743901578e-07, "loss": 1.5002, "step": 7652 }, { "epoch": 2.6083844580777096, "grad_norm": 25.68624695452573, "learning_rate": 5.111322599853469e-07, "loss": 1.2751, "step": 7653 }, { "epoch": 2.608725289706885, "grad_norm": 18.782897951184097, "learning_rate": 5.102591516973005e-07, "loss": 1.2298, "step": 7654 }, { "epoch": 2.6090661213360598, "grad_norm": 18.376331997446762, "learning_rate": 5.09386749663362e-07, "loss": 1.5143, "step": 7655 }, { "epoch": 2.609406952965235, "grad_norm": 16.00591132101434, "learning_rate": 5.085150540207639e-07, "loss": 1.1487, "step": 7656 }, { "epoch": 2.6097477845944104, "grad_norm": 12.911772052321503, "learning_rate": 5.076440649066277e-07, "loss": 1.0996, "step": 7657 }, { "epoch": 2.6100886162235857, "grad_norm": 15.583305808746854, "learning_rate": 5.067737824579627e-07, "loss": 1.392, "step": 7658 }, { "epoch": 2.610429447852761, "grad_norm": 14.556019346639575, "learning_rate": 5.059042068116687e-07, "loss": 1.711, "step": 7659 }, { "epoch": 2.610770279481936, "grad_norm": 22.784794184067323, "learning_rate": 5.050353381045326e-07, "loss": 2.064, "step": 7660 }, { "epoch": 2.611111111111111, "grad_norm": 14.885913992818663, "learning_rate": 5.041671764732314e-07, "loss": 1.4587, "step": 7661 }, { "epoch": 2.6114519427402865, "grad_norm": 9.712231079307701, "learning_rate": 5.032997220543307e-07, "loss": 1.1508, "step": 7662 }, { "epoch": 2.6117927743694613, "grad_norm": 9.554327363543404, "learning_rate": 5.024329749842827e-07, "loss": 1.1248, "step": 7663 }, { "epoch": 2.6121336059986366, "grad_norm": 10.053131744416408, "learning_rate": 5.015669353994324e-07, "loss": 1.1432, "step": 7664 }, { "epoch": 2.612474437627812, "grad_norm": 14.611327011695462, "learning_rate": 5.007016034360113e-07, "loss": 1.3088, "step": 7665 }, { "epoch": 2.612815269256987, "grad_norm": 14.888229469918, "learning_rate": 4.998369792301366e-07, "loss": 1.7243, "step": 7666 }, { "epoch": 2.613156100886162, "grad_norm": 13.12397123450047, "learning_rate": 4.989730629178208e-07, "loss": 1.2319, "step": 7667 }, { "epoch": 2.6134969325153374, "grad_norm": 15.232165850450656, "learning_rate": 4.981098546349594e-07, "loss": 1.4397, "step": 7668 }, { "epoch": 2.6138377641445127, "grad_norm": 13.940008269027095, "learning_rate": 4.972473545173379e-07, "loss": 1.2343, "step": 7669 }, { "epoch": 2.614178595773688, "grad_norm": 18.99113390079362, "learning_rate": 4.963855627006331e-07, "loss": 1.1965, "step": 7670 }, { "epoch": 2.614519427402863, "grad_norm": 11.514383419468558, "learning_rate": 4.955244793204067e-07, "loss": 1.0754, "step": 7671 }, { "epoch": 2.6148602590320382, "grad_norm": 11.392428016814703, "learning_rate": 4.946641045121115e-07, "loss": 1.0203, "step": 7672 }, { "epoch": 2.6152010906612135, "grad_norm": 11.587284746616552, "learning_rate": 4.938044384110868e-07, "loss": 1.4241, "step": 7673 }, { "epoch": 2.6155419222903884, "grad_norm": 12.859058843385037, "learning_rate": 4.92945481152562e-07, "loss": 1.4092, "step": 7674 }, { "epoch": 2.6158827539195637, "grad_norm": 17.833400560899033, "learning_rate": 4.920872328716547e-07, "loss": 1.281, "step": 7675 }, { "epoch": 2.616223585548739, "grad_norm": 19.14000646312008, "learning_rate": 4.912296937033706e-07, "loss": 1.6787, "step": 7676 }, { "epoch": 2.616564417177914, "grad_norm": 12.036128791014066, "learning_rate": 4.903728637826038e-07, "loss": 1.3302, "step": 7677 }, { "epoch": 2.616905248807089, "grad_norm": 13.68930587287639, "learning_rate": 4.895167432441378e-07, "loss": 1.3586, "step": 7678 }, { "epoch": 2.6172460804362645, "grad_norm": 16.089817213294467, "learning_rate": 4.88661332222643e-07, "loss": 1.611, "step": 7679 }, { "epoch": 2.61758691206544, "grad_norm": 13.663495264062934, "learning_rate": 4.878066308526803e-07, "loss": 0.7885, "step": 7680 }, { "epoch": 2.617927743694615, "grad_norm": 36.899342208629406, "learning_rate": 4.869526392686946e-07, "loss": 1.1679, "step": 7681 }, { "epoch": 2.61826857532379, "grad_norm": 12.788538713030725, "learning_rate": 4.860993576050249e-07, "loss": 1.4739, "step": 7682 }, { "epoch": 2.6186094069529653, "grad_norm": 14.601751201818121, "learning_rate": 4.852467859958965e-07, "loss": 1.0906, "step": 7683 }, { "epoch": 2.6189502385821406, "grad_norm": 12.753006114731432, "learning_rate": 4.843949245754192e-07, "loss": 1.5752, "step": 7684 }, { "epoch": 2.6192910702113155, "grad_norm": 18.55938115571647, "learning_rate": 4.835437734775966e-07, "loss": 1.706, "step": 7685 }, { "epoch": 2.6196319018404908, "grad_norm": 15.076659195420685, "learning_rate": 4.82693332836317e-07, "loss": 1.7997, "step": 7686 }, { "epoch": 2.619972733469666, "grad_norm": 20.023439794461428, "learning_rate": 4.818436027853584e-07, "loss": 1.1161, "step": 7687 }, { "epoch": 2.620313565098841, "grad_norm": 14.260296081364507, "learning_rate": 4.809945834583873e-07, "loss": 1.0781, "step": 7688 }, { "epoch": 2.6206543967280163, "grad_norm": 12.233580707751281, "learning_rate": 4.801462749889568e-07, "loss": 1.0002, "step": 7689 }, { "epoch": 2.6209952283571916, "grad_norm": 13.589209495368374, "learning_rate": 4.792986775105102e-07, "loss": 1.4921, "step": 7690 }, { "epoch": 2.621336059986367, "grad_norm": 16.023154162511414, "learning_rate": 4.784517911563774e-07, "loss": 1.3418, "step": 7691 }, { "epoch": 2.621676891615542, "grad_norm": 15.52327331704132, "learning_rate": 4.776056160597769e-07, "loss": 1.2877, "step": 7692 }, { "epoch": 2.622017723244717, "grad_norm": 16.047076298300865, "learning_rate": 4.767601523538157e-07, "loss": 2.0197, "step": 7693 }, { "epoch": 2.6223585548738924, "grad_norm": 17.571215943529168, "learning_rate": 4.759154001714883e-07, "loss": 1.0487, "step": 7694 }, { "epoch": 2.6226993865030677, "grad_norm": 16.04375037655816, "learning_rate": 4.7507135964567795e-07, "loss": 1.634, "step": 7695 }, { "epoch": 2.6230402181322425, "grad_norm": 51.62853440939501, "learning_rate": 4.742280309091563e-07, "loss": 1.7843, "step": 7696 }, { "epoch": 2.623381049761418, "grad_norm": 11.578911506686502, "learning_rate": 4.733854140945798e-07, "loss": 1.1521, "step": 7697 }, { "epoch": 2.623721881390593, "grad_norm": 20.13857142351193, "learning_rate": 4.7254350933449764e-07, "loss": 1.5795, "step": 7698 }, { "epoch": 2.624062713019768, "grad_norm": 21.52270531774509, "learning_rate": 4.7170231676134546e-07, "loss": 1.1399, "step": 7699 }, { "epoch": 2.6244035446489433, "grad_norm": 13.44238971855142, "learning_rate": 4.70861836507443e-07, "loss": 1.4992, "step": 7700 }, { "epoch": 2.6247443762781186, "grad_norm": 16.980077529800855, "learning_rate": 4.70022068705005e-07, "loss": 0.9568, "step": 7701 }, { "epoch": 2.625085207907294, "grad_norm": 14.49006163371389, "learning_rate": 4.691830134861275e-07, "loss": 1.7436, "step": 7702 }, { "epoch": 2.6254260395364692, "grad_norm": 15.115783045182448, "learning_rate": 4.683446709827971e-07, "loss": 1.8825, "step": 7703 }, { "epoch": 2.625766871165644, "grad_norm": 14.6068027082302, "learning_rate": 4.675070413268917e-07, "loss": 1.4441, "step": 7704 }, { "epoch": 2.6261077027948194, "grad_norm": 15.518059222146553, "learning_rate": 4.666701246501698e-07, "loss": 1.4723, "step": 7705 }, { "epoch": 2.6264485344239947, "grad_norm": 12.298871315562094, "learning_rate": 4.658339210842844e-07, "loss": 1.2641, "step": 7706 }, { "epoch": 2.6267893660531696, "grad_norm": 13.995629812915185, "learning_rate": 4.6499843076077187e-07, "loss": 1.6411, "step": 7707 }, { "epoch": 2.627130197682345, "grad_norm": 15.050877844870971, "learning_rate": 4.641636538110594e-07, "loss": 1.5603, "step": 7708 }, { "epoch": 2.62747102931152, "grad_norm": 17.158471121418742, "learning_rate": 4.6332959036645975e-07, "loss": 1.4349, "step": 7709 }, { "epoch": 2.627811860940695, "grad_norm": 13.825309760861645, "learning_rate": 4.6249624055817564e-07, "loss": 0.994, "step": 7710 }, { "epoch": 2.6281526925698704, "grad_norm": 16.09532300929592, "learning_rate": 4.6166360451729564e-07, "loss": 1.4629, "step": 7711 }, { "epoch": 2.6284935241990457, "grad_norm": 15.650926629425596, "learning_rate": 4.6083168237479603e-07, "loss": 1.6271, "step": 7712 }, { "epoch": 2.628834355828221, "grad_norm": 15.283637381330518, "learning_rate": 4.600004742615427e-07, "loss": 1.3527, "step": 7713 }, { "epoch": 2.6291751874573963, "grad_norm": 15.25932191302661, "learning_rate": 4.591699803082872e-07, "loss": 1.1761, "step": 7714 }, { "epoch": 2.629516019086571, "grad_norm": 18.608322730171544, "learning_rate": 4.5834020064566997e-07, "loss": 1.7409, "step": 7715 }, { "epoch": 2.6298568507157465, "grad_norm": 16.808885830580586, "learning_rate": 4.5751113540421885e-07, "loss": 1.4939, "step": 7716 }, { "epoch": 2.630197682344922, "grad_norm": 14.039345793201036, "learning_rate": 4.56682784714349e-07, "loss": 1.1231, "step": 7717 }, { "epoch": 2.6305385139740967, "grad_norm": 12.786467094496615, "learning_rate": 4.5585514870636225e-07, "loss": 1.2704, "step": 7718 }, { "epoch": 2.630879345603272, "grad_norm": 16.694828217918154, "learning_rate": 4.550282275104495e-07, "loss": 1.4531, "step": 7719 }, { "epoch": 2.6312201772324473, "grad_norm": 11.508467338842266, "learning_rate": 4.5420202125669e-07, "loss": 1.7934, "step": 7720 }, { "epoch": 2.631561008861622, "grad_norm": 23.171279724577914, "learning_rate": 4.5337653007504814e-07, "loss": 1.7467, "step": 7721 }, { "epoch": 2.6319018404907975, "grad_norm": 29.499970643466533, "learning_rate": 4.5255175409537667e-07, "loss": 1.5627, "step": 7722 }, { "epoch": 2.6322426721199728, "grad_norm": 13.71483448524475, "learning_rate": 4.5172769344741686e-07, "loss": 1.5466, "step": 7723 }, { "epoch": 2.632583503749148, "grad_norm": 13.509711699035044, "learning_rate": 4.509043482607961e-07, "loss": 1.1888, "step": 7724 }, { "epoch": 2.6329243353783234, "grad_norm": 8.431942467158406, "learning_rate": 4.500817186650308e-07, "loss": 1.2252, "step": 7725 }, { "epoch": 2.6332651670074982, "grad_norm": 18.64211826246726, "learning_rate": 4.4925980478952303e-07, "loss": 1.449, "step": 7726 }, { "epoch": 2.6336059986366736, "grad_norm": 12.110938237420717, "learning_rate": 4.4843860676356376e-07, "loss": 0.9156, "step": 7727 }, { "epoch": 2.633946830265849, "grad_norm": 17.135542634957233, "learning_rate": 4.476181247163297e-07, "loss": 1.6148, "step": 7728 }, { "epoch": 2.6342876618950237, "grad_norm": 21.49570533243647, "learning_rate": 4.4679835877688706e-07, "loss": 1.1998, "step": 7729 }, { "epoch": 2.634628493524199, "grad_norm": 14.509506421986927, "learning_rate": 4.459793090741887e-07, "loss": 1.404, "step": 7730 }, { "epoch": 2.6349693251533743, "grad_norm": 15.991652792735968, "learning_rate": 4.4516097573707175e-07, "loss": 1.917, "step": 7731 }, { "epoch": 2.635310156782549, "grad_norm": 14.60052849195606, "learning_rate": 4.443433588942658e-07, "loss": 1.4789, "step": 7732 }, { "epoch": 2.6356509884117245, "grad_norm": 15.637998593313293, "learning_rate": 4.4352645867438594e-07, "loss": 0.7317, "step": 7733 }, { "epoch": 2.6359918200409, "grad_norm": 18.216432614139986, "learning_rate": 4.427102752059298e-07, "loss": 1.5986, "step": 7734 }, { "epoch": 2.636332651670075, "grad_norm": 17.1453145850343, "learning_rate": 4.4189480861729137e-07, "loss": 1.0067, "step": 7735 }, { "epoch": 2.6366734832992504, "grad_norm": 12.598979418878164, "learning_rate": 4.410800590367431e-07, "loss": 0.8576, "step": 7736 }, { "epoch": 2.6370143149284253, "grad_norm": 15.395934148449621, "learning_rate": 4.4026602659244856e-07, "loss": 1.6912, "step": 7737 }, { "epoch": 2.6373551465576006, "grad_norm": 14.633686757949757, "learning_rate": 4.394527114124614e-07, "loss": 1.7482, "step": 7738 }, { "epoch": 2.637695978186776, "grad_norm": 13.152639176432928, "learning_rate": 4.386401136247159e-07, "loss": 1.5672, "step": 7739 }, { "epoch": 2.638036809815951, "grad_norm": 12.985020453735148, "learning_rate": 4.3782823335703883e-07, "loss": 1.5336, "step": 7740 }, { "epoch": 2.638377641445126, "grad_norm": 16.638008359243816, "learning_rate": 4.3701707073714127e-07, "loss": 1.519, "step": 7741 }, { "epoch": 2.6387184730743014, "grad_norm": 12.525150719213373, "learning_rate": 4.3620662589262285e-07, "loss": 0.9935, "step": 7742 }, { "epoch": 2.6390593047034763, "grad_norm": 19.96362900125915, "learning_rate": 4.3539689895096936e-07, "loss": 1.2397, "step": 7743 }, { "epoch": 2.6394001363326516, "grad_norm": 17.465068023336027, "learning_rate": 4.3458789003955395e-07, "loss": 1.4404, "step": 7744 }, { "epoch": 2.639740967961827, "grad_norm": 17.24616384308173, "learning_rate": 4.337795992856381e-07, "loss": 1.345, "step": 7745 }, { "epoch": 2.640081799591002, "grad_norm": 16.333202135337864, "learning_rate": 4.3297202681636785e-07, "loss": 1.5025, "step": 7746 }, { "epoch": 2.6404226312201775, "grad_norm": 14.51267999763345, "learning_rate": 4.3216517275877777e-07, "loss": 1.5114, "step": 7747 }, { "epoch": 2.6407634628493524, "grad_norm": 16.04824115965645, "learning_rate": 4.3135903723979023e-07, "loss": 1.3824, "step": 7748 }, { "epoch": 2.6411042944785277, "grad_norm": 20.461962073131346, "learning_rate": 4.305536203862132e-07, "loss": 1.3215, "step": 7749 }, { "epoch": 2.641445126107703, "grad_norm": 11.330043259420776, "learning_rate": 4.297489223247403e-07, "loss": 0.954, "step": 7750 }, { "epoch": 2.641785957736878, "grad_norm": 20.67936990850048, "learning_rate": 4.2894494318195645e-07, "loss": 0.9849, "step": 7751 }, { "epoch": 2.642126789366053, "grad_norm": 10.46168015670137, "learning_rate": 4.281416830843288e-07, "loss": 0.9257, "step": 7752 }, { "epoch": 2.6424676209952285, "grad_norm": 10.221074681429823, "learning_rate": 4.273391421582124e-07, "loss": 0.9859, "step": 7753 }, { "epoch": 2.6428084526244033, "grad_norm": 13.053603268781615, "learning_rate": 4.2653732052985397e-07, "loss": 1.2832, "step": 7754 }, { "epoch": 2.6431492842535786, "grad_norm": 10.917827568348867, "learning_rate": 4.257362183253799e-07, "loss": 1.347, "step": 7755 }, { "epoch": 2.643490115882754, "grad_norm": 11.989655909652017, "learning_rate": 4.2493583567080777e-07, "loss": 1.3557, "step": 7756 }, { "epoch": 2.6438309475119293, "grad_norm": 13.81648315144623, "learning_rate": 4.2413617269204066e-07, "loss": 1.3846, "step": 7757 }, { "epoch": 2.644171779141104, "grad_norm": 27.28153799691946, "learning_rate": 4.2333722951486965e-07, "loss": 0.7134, "step": 7758 }, { "epoch": 2.6445126107702794, "grad_norm": 20.072140412242078, "learning_rate": 4.2253900626497093e-07, "loss": 1.3419, "step": 7759 }, { "epoch": 2.6448534423994547, "grad_norm": 16.57675300469527, "learning_rate": 4.2174150306790786e-07, "loss": 1.7682, "step": 7760 }, { "epoch": 2.64519427402863, "grad_norm": 21.105021708550275, "learning_rate": 4.209447200491318e-07, "loss": 1.6267, "step": 7761 }, { "epoch": 2.645535105657805, "grad_norm": 19.328896153712027, "learning_rate": 4.201486573339791e-07, "loss": 1.351, "step": 7762 }, { "epoch": 2.6458759372869802, "grad_norm": 14.717174975104685, "learning_rate": 4.193533150476742e-07, "loss": 1.5083, "step": 7763 }, { "epoch": 2.6462167689161555, "grad_norm": 17.186452842631493, "learning_rate": 4.185586933153285e-07, "loss": 1.9581, "step": 7764 }, { "epoch": 2.6465576005453304, "grad_norm": 14.262743758511634, "learning_rate": 4.1776479226193543e-07, "loss": 1.3581, "step": 7765 }, { "epoch": 2.6468984321745057, "grad_norm": 16.236783291800926, "learning_rate": 4.169716120123829e-07, "loss": 1.5944, "step": 7766 }, { "epoch": 2.647239263803681, "grad_norm": 16.559460227348495, "learning_rate": 4.1617915269144047e-07, "loss": 1.5905, "step": 7767 }, { "epoch": 2.6475800954328563, "grad_norm": 15.907168742272804, "learning_rate": 4.1538741442376297e-07, "loss": 1.9036, "step": 7768 }, { "epoch": 2.647920927062031, "grad_norm": 16.328384364697417, "learning_rate": 4.1459639733389676e-07, "loss": 1.7929, "step": 7769 }, { "epoch": 2.6482617586912065, "grad_norm": 17.94846134149216, "learning_rate": 4.138061015462702e-07, "loss": 1.464, "step": 7770 }, { "epoch": 2.648602590320382, "grad_norm": 14.183304025461293, "learning_rate": 4.1301652718519934e-07, "loss": 1.1229, "step": 7771 }, { "epoch": 2.648943421949557, "grad_norm": 18.297017736508003, "learning_rate": 4.1222767437488986e-07, "loss": 1.2185, "step": 7772 }, { "epoch": 2.649284253578732, "grad_norm": 46.35249206972199, "learning_rate": 4.114395432394297e-07, "loss": 1.5993, "step": 7773 }, { "epoch": 2.6496250852079073, "grad_norm": 15.417262165983074, "learning_rate": 4.1065213390279524e-07, "loss": 1.26, "step": 7774 }, { "epoch": 2.6499659168370826, "grad_norm": 14.684580946569552, "learning_rate": 4.098654464888496e-07, "loss": 1.9025, "step": 7775 }, { "epoch": 2.6503067484662575, "grad_norm": 15.708029001238208, "learning_rate": 4.090794811213411e-07, "loss": 1.7399, "step": 7776 }, { "epoch": 2.6506475800954328, "grad_norm": 22.032778170819377, "learning_rate": 4.0829423792390577e-07, "loss": 0.7559, "step": 7777 }, { "epoch": 2.650988411724608, "grad_norm": 26.869847806976644, "learning_rate": 4.0750971702006537e-07, "loss": 1.4664, "step": 7778 }, { "epoch": 2.6513292433537834, "grad_norm": 19.109611233766728, "learning_rate": 4.0672591853322784e-07, "loss": 2.1366, "step": 7779 }, { "epoch": 2.6516700749829583, "grad_norm": 13.40718289200999, "learning_rate": 4.0594284258668846e-07, "loss": 1.6611, "step": 7780 }, { "epoch": 2.6520109066121336, "grad_norm": 16.422820364696385, "learning_rate": 4.051604893036276e-07, "loss": 1.5282, "step": 7781 }, { "epoch": 2.652351738241309, "grad_norm": 18.930119501659625, "learning_rate": 4.0437885880711293e-07, "loss": 2.0103, "step": 7782 }, { "epoch": 2.652692569870484, "grad_norm": 22.682415420500924, "learning_rate": 4.0359795122009825e-07, "loss": 1.2153, "step": 7783 }, { "epoch": 2.653033401499659, "grad_norm": 11.140848012062857, "learning_rate": 4.0281776666542204e-07, "loss": 1.3739, "step": 7784 }, { "epoch": 2.6533742331288344, "grad_norm": 26.024361997643695, "learning_rate": 4.0203830526581277e-07, "loss": 1.6619, "step": 7785 }, { "epoch": 2.6537150647580097, "grad_norm": 13.725842586865713, "learning_rate": 4.012595671438807e-07, "loss": 1.6374, "step": 7786 }, { "epoch": 2.6540558963871845, "grad_norm": 20.365610856290534, "learning_rate": 4.004815524221245e-07, "loss": 1.5722, "step": 7787 }, { "epoch": 2.65439672801636, "grad_norm": 17.8274138520206, "learning_rate": 3.9970426122293125e-07, "loss": 1.9641, "step": 7788 }, { "epoch": 2.654737559645535, "grad_norm": 23.74683616599662, "learning_rate": 3.9892769366856986e-07, "loss": 1.1312, "step": 7789 }, { "epoch": 2.6550783912747105, "grad_norm": 13.613434100441507, "learning_rate": 3.9815184988119817e-07, "loss": 1.5715, "step": 7790 }, { "epoch": 2.6554192229038853, "grad_norm": 14.645207458140527, "learning_rate": 3.9737672998285904e-07, "loss": 1.4185, "step": 7791 }, { "epoch": 2.6557600545330606, "grad_norm": 15.980630532727378, "learning_rate": 3.966023340954822e-07, "loss": 1.7762, "step": 7792 }, { "epoch": 2.656100886162236, "grad_norm": 9.367205504333942, "learning_rate": 3.958286623408836e-07, "loss": 1.4074, "step": 7793 }, { "epoch": 2.6564417177914113, "grad_norm": 19.113259508477366, "learning_rate": 3.950557148407641e-07, "loss": 1.6684, "step": 7794 }, { "epoch": 2.656782549420586, "grad_norm": 17.52479240233182, "learning_rate": 3.942834917167121e-07, "loss": 1.1084, "step": 7795 }, { "epoch": 2.6571233810497614, "grad_norm": 12.180743250538848, "learning_rate": 3.935119930902015e-07, "loss": 0.9065, "step": 7796 }, { "epoch": 2.6574642126789367, "grad_norm": 41.32326177623418, "learning_rate": 3.927412190825913e-07, "loss": 1.128, "step": 7797 }, { "epoch": 2.6578050443081116, "grad_norm": 8.903573193658897, "learning_rate": 3.9197116981512907e-07, "loss": 1.1785, "step": 7798 }, { "epoch": 2.658145875937287, "grad_norm": 8.798601425975658, "learning_rate": 3.9120184540894336e-07, "loss": 0.9837, "step": 7799 }, { "epoch": 2.658486707566462, "grad_norm": 18.069434661624744, "learning_rate": 3.904332459850546e-07, "loss": 1.6623, "step": 7800 }, { "epoch": 2.658827539195637, "grad_norm": 19.07043219490256, "learning_rate": 3.896653716643667e-07, "loss": 1.5415, "step": 7801 }, { "epoch": 2.6591683708248124, "grad_norm": 13.351870086632644, "learning_rate": 3.888982225676669e-07, "loss": 1.4736, "step": 7802 }, { "epoch": 2.6595092024539877, "grad_norm": 18.545711315894586, "learning_rate": 3.881317988156336e-07, "loss": 1.5606, "step": 7803 }, { "epoch": 2.659850034083163, "grad_norm": 13.533396922778447, "learning_rate": 3.873661005288276e-07, "loss": 1.2747, "step": 7804 }, { "epoch": 2.6601908657123383, "grad_norm": 21.70281722198345, "learning_rate": 3.8660112782769433e-07, "loss": 1.2614, "step": 7805 }, { "epoch": 2.660531697341513, "grad_norm": 17.938850830498293, "learning_rate": 3.858368808325702e-07, "loss": 2.0465, "step": 7806 }, { "epoch": 2.6608725289706885, "grad_norm": 15.635854036135262, "learning_rate": 3.8507335966367186e-07, "loss": 1.4135, "step": 7807 }, { "epoch": 2.661213360599864, "grad_norm": 18.569405228942006, "learning_rate": 3.843105644411038e-07, "loss": 2.2657, "step": 7808 }, { "epoch": 2.6615541922290387, "grad_norm": 18.404488904246644, "learning_rate": 3.8354849528486005e-07, "loss": 1.8543, "step": 7809 }, { "epoch": 2.661895023858214, "grad_norm": 15.229405513604094, "learning_rate": 3.8278715231481414e-07, "loss": 1.5476, "step": 7810 }, { "epoch": 2.6622358554873893, "grad_norm": 14.319818486101033, "learning_rate": 3.820265356507291e-07, "loss": 1.4331, "step": 7811 }, { "epoch": 2.662576687116564, "grad_norm": 12.738526045985424, "learning_rate": 3.8126664541225324e-07, "loss": 1.4386, "step": 7812 }, { "epoch": 2.6629175187457395, "grad_norm": 13.107749889435853, "learning_rate": 3.8050748171892025e-07, "loss": 1.2599, "step": 7813 }, { "epoch": 2.6632583503749148, "grad_norm": 12.383125411684835, "learning_rate": 3.797490446901497e-07, "loss": 1.5627, "step": 7814 }, { "epoch": 2.66359918200409, "grad_norm": 19.99085334021286, "learning_rate": 3.789913344452456e-07, "loss": 1.2343, "step": 7815 }, { "epoch": 2.6639400136332654, "grad_norm": 14.00483702119686, "learning_rate": 3.7823435110340036e-07, "loss": 1.2823, "step": 7816 }, { "epoch": 2.6642808452624402, "grad_norm": 16.681705190051034, "learning_rate": 3.774780947836909e-07, "loss": 1.5107, "step": 7817 }, { "epoch": 2.6646216768916156, "grad_norm": 11.261882973420477, "learning_rate": 3.76722565605076e-07, "loss": 1.3255, "step": 7818 }, { "epoch": 2.664962508520791, "grad_norm": 10.590859641799282, "learning_rate": 3.7596776368640843e-07, "loss": 1.3015, "step": 7819 }, { "epoch": 2.6653033401499657, "grad_norm": 28.145325359830053, "learning_rate": 3.75213689146417e-07, "loss": 1.5126, "step": 7820 }, { "epoch": 2.665644171779141, "grad_norm": 24.691701265521274, "learning_rate": 3.744603421037224e-07, "loss": 1.5785, "step": 7821 }, { "epoch": 2.6659850034083163, "grad_norm": 20.44950377751187, "learning_rate": 3.7370772267683044e-07, "loss": 1.0925, "step": 7822 }, { "epoch": 2.666325835037491, "grad_norm": 13.235536036525458, "learning_rate": 3.7295583098412913e-07, "loss": 1.3877, "step": 7823 }, { "epoch": 2.6666666666666665, "grad_norm": 12.359995018635434, "learning_rate": 3.72204667143895e-07, "loss": 1.1274, "step": 7824 }, { "epoch": 2.667007498295842, "grad_norm": 10.275599584228384, "learning_rate": 3.714542312742886e-07, "loss": 0.8469, "step": 7825 }, { "epoch": 2.667348329925017, "grad_norm": 16.022969414278013, "learning_rate": 3.7070452349335704e-07, "loss": 1.3223, "step": 7826 }, { "epoch": 2.6676891615541924, "grad_norm": 16.03249179843436, "learning_rate": 3.699555439190322e-07, "loss": 1.8954, "step": 7827 }, { "epoch": 2.6680299931833673, "grad_norm": 17.429179154201318, "learning_rate": 3.692072926691309e-07, "loss": 1.9761, "step": 7828 }, { "epoch": 2.6683708248125426, "grad_norm": 18.63018811027642, "learning_rate": 3.6845976986135735e-07, "loss": 0.9547, "step": 7829 }, { "epoch": 2.668711656441718, "grad_norm": 25.03063135990437, "learning_rate": 3.6771297561329854e-07, "loss": 1.0864, "step": 7830 }, { "epoch": 2.669052488070893, "grad_norm": 9.91755067254692, "learning_rate": 3.669669100424289e-07, "loss": 1.3366, "step": 7831 }, { "epoch": 2.669393319700068, "grad_norm": 15.160286899973935, "learning_rate": 3.662215732661073e-07, "loss": 1.3539, "step": 7832 }, { "epoch": 2.6697341513292434, "grad_norm": 9.742803357653077, "learning_rate": 3.654769654015783e-07, "loss": 1.0024, "step": 7833 }, { "epoch": 2.6700749829584183, "grad_norm": 25.31883700598765, "learning_rate": 3.647330865659715e-07, "loss": 1.0595, "step": 7834 }, { "epoch": 2.6704158145875936, "grad_norm": 19.988728134099347, "learning_rate": 3.639899368763028e-07, "loss": 1.1669, "step": 7835 }, { "epoch": 2.670756646216769, "grad_norm": 11.081578133819924, "learning_rate": 3.6324751644947085e-07, "loss": 1.2867, "step": 7836 }, { "epoch": 2.671097477845944, "grad_norm": 15.329988560235401, "learning_rate": 3.6250582540226286e-07, "loss": 1.4313, "step": 7837 }, { "epoch": 2.6714383094751195, "grad_norm": 16.46541053977173, "learning_rate": 3.617648638513499e-07, "loss": 1.8117, "step": 7838 }, { "epoch": 2.6717791411042944, "grad_norm": 21.795895582666517, "learning_rate": 3.610246319132865e-07, "loss": 1.7401, "step": 7839 }, { "epoch": 2.6721199727334697, "grad_norm": 20.201877888176863, "learning_rate": 3.6028512970451623e-07, "loss": 1.8716, "step": 7840 }, { "epoch": 2.672460804362645, "grad_norm": 19.298641304048488, "learning_rate": 3.595463573413643e-07, "loss": 1.0439, "step": 7841 }, { "epoch": 2.67280163599182, "grad_norm": 23.134455226320387, "learning_rate": 3.588083149400423e-07, "loss": 1.4072, "step": 7842 }, { "epoch": 2.673142467620995, "grad_norm": 18.017739350635498, "learning_rate": 3.5807100261664894e-07, "loss": 0.557, "step": 7843 }, { "epoch": 2.6734832992501705, "grad_norm": 15.374244083213435, "learning_rate": 3.5733442048716484e-07, "loss": 1.637, "step": 7844 }, { "epoch": 2.6738241308793453, "grad_norm": 14.201387814855059, "learning_rate": 3.565985686674578e-07, "loss": 1.7156, "step": 7845 }, { "epoch": 2.6741649625085206, "grad_norm": 11.24183847801657, "learning_rate": 3.5586344727328027e-07, "loss": 0.9711, "step": 7846 }, { "epoch": 2.674505794137696, "grad_norm": 13.860981074638048, "learning_rate": 3.5512905642026974e-07, "loss": 1.9548, "step": 7847 }, { "epoch": 2.6748466257668713, "grad_norm": 20.671953982241124, "learning_rate": 3.5439539622394934e-07, "loss": 1.0235, "step": 7848 }, { "epoch": 2.6751874573960466, "grad_norm": 10.19923186322359, "learning_rate": 3.536624667997246e-07, "loss": 1.2436, "step": 7849 }, { "epoch": 2.6755282890252214, "grad_norm": 15.353681050362521, "learning_rate": 3.5293026826289043e-07, "loss": 1.6182, "step": 7850 }, { "epoch": 2.6758691206543967, "grad_norm": 15.05699545961098, "learning_rate": 3.5219880072862534e-07, "loss": 1.5452, "step": 7851 }, { "epoch": 2.676209952283572, "grad_norm": 12.31068488350357, "learning_rate": 3.514680643119889e-07, "loss": 1.5833, "step": 7852 }, { "epoch": 2.676550783912747, "grad_norm": 9.697543257832464, "learning_rate": 3.5073805912793244e-07, "loss": 0.951, "step": 7853 }, { "epoch": 2.6768916155419222, "grad_norm": 15.39533400541716, "learning_rate": 3.500087852912859e-07, "loss": 1.0527, "step": 7854 }, { "epoch": 2.6772324471710975, "grad_norm": 15.053015019663684, "learning_rate": 3.492802429167669e-07, "loss": 1.8663, "step": 7855 }, { "epoch": 2.6775732788002724, "grad_norm": 16.293585749483363, "learning_rate": 3.4855243211898146e-07, "loss": 1.6889, "step": 7856 }, { "epoch": 2.6779141104294477, "grad_norm": 15.373781206926635, "learning_rate": 3.4782535301241315e-07, "loss": 1.6508, "step": 7857 }, { "epoch": 2.678254942058623, "grad_norm": 15.923568925037225, "learning_rate": 3.470990057114365e-07, "loss": 1.2919, "step": 7858 }, { "epoch": 2.6785957736877983, "grad_norm": 16.262113744569717, "learning_rate": 3.463733903303085e-07, "loss": 1.1861, "step": 7859 }, { "epoch": 2.6789366053169736, "grad_norm": 14.747995633682201, "learning_rate": 3.456485069831711e-07, "loss": 1.3213, "step": 7860 }, { "epoch": 2.6792774369461485, "grad_norm": 13.601744151893882, "learning_rate": 3.4492435578405146e-07, "loss": 0.8925, "step": 7861 }, { "epoch": 2.679618268575324, "grad_norm": 16.22083915076897, "learning_rate": 3.442009368468613e-07, "loss": 1.5687, "step": 7862 }, { "epoch": 2.679959100204499, "grad_norm": 15.617567296264024, "learning_rate": 3.434782502853978e-07, "loss": 1.6859, "step": 7863 }, { "epoch": 2.680299931833674, "grad_norm": 26.838900136746254, "learning_rate": 3.427562962133418e-07, "loss": 1.8461, "step": 7864 }, { "epoch": 2.6806407634628493, "grad_norm": 14.214350050477996, "learning_rate": 3.4203507474425967e-07, "loss": 1.7913, "step": 7865 }, { "epoch": 2.6809815950920246, "grad_norm": 14.90565586369843, "learning_rate": 3.413145859916028e-07, "loss": 1.3606, "step": 7866 }, { "epoch": 2.6813224267211995, "grad_norm": 30.978719757063537, "learning_rate": 3.4059483006870677e-07, "loss": 1.6558, "step": 7867 }, { "epoch": 2.681663258350375, "grad_norm": 12.630233057229475, "learning_rate": 3.3987580708879143e-07, "loss": 1.5652, "step": 7868 }, { "epoch": 2.68200408997955, "grad_norm": 16.709220674371704, "learning_rate": 3.3915751716496413e-07, "loss": 1.3903, "step": 7869 }, { "epoch": 2.6823449216087254, "grad_norm": 13.243827878807327, "learning_rate": 3.3843996041021066e-07, "loss": 1.7876, "step": 7870 }, { "epoch": 2.6826857532379007, "grad_norm": 17.012702660471586, "learning_rate": 3.3772313693740954e-07, "loss": 1.118, "step": 7871 }, { "epoch": 2.6830265848670756, "grad_norm": 13.706615296183385, "learning_rate": 3.3700704685931895e-07, "loss": 1.4805, "step": 7872 }, { "epoch": 2.683367416496251, "grad_norm": 17.610271940098563, "learning_rate": 3.3629169028858046e-07, "loss": 1.171, "step": 7873 }, { "epoch": 2.683708248125426, "grad_norm": 9.984825469416746, "learning_rate": 3.355770673377262e-07, "loss": 1.0858, "step": 7874 }, { "epoch": 2.684049079754601, "grad_norm": 10.83568414876771, "learning_rate": 3.348631781191664e-07, "loss": 0.9809, "step": 7875 }, { "epoch": 2.6843899113837764, "grad_norm": 21.32135702335243, "learning_rate": 3.3415002274519824e-07, "loss": 1.3589, "step": 7876 }, { "epoch": 2.6847307430129517, "grad_norm": 14.380075492172123, "learning_rate": 3.334376013280072e-07, "loss": 1.4442, "step": 7877 }, { "epoch": 2.6850715746421265, "grad_norm": 12.693135501946157, "learning_rate": 3.327259139796568e-07, "loss": 1.1404, "step": 7878 }, { "epoch": 2.685412406271302, "grad_norm": 22.036208220965033, "learning_rate": 3.3201496081209984e-07, "loss": 1.2565, "step": 7879 }, { "epoch": 2.685753237900477, "grad_norm": 25.901666855532397, "learning_rate": 3.3130474193717176e-07, "loss": 1.1245, "step": 7880 }, { "epoch": 2.6860940695296525, "grad_norm": 18.67299819235336, "learning_rate": 3.305952574665927e-07, "loss": 1.892, "step": 7881 }, { "epoch": 2.6864349011588278, "grad_norm": 22.957280924843023, "learning_rate": 3.298865075119684e-07, "loss": 1.6049, "step": 7882 }, { "epoch": 2.6867757327880026, "grad_norm": 9.93875014266664, "learning_rate": 3.2917849218478513e-07, "loss": 0.7872, "step": 7883 }, { "epoch": 2.687116564417178, "grad_norm": 13.153510219454484, "learning_rate": 3.284712115964195e-07, "loss": 1.2284, "step": 7884 }, { "epoch": 2.6874573960463533, "grad_norm": 13.998178577135569, "learning_rate": 3.27764665858129e-07, "loss": 1.0878, "step": 7885 }, { "epoch": 2.687798227675528, "grad_norm": 33.372843210446796, "learning_rate": 3.270588550810544e-07, "loss": 1.5453, "step": 7886 }, { "epoch": 2.6881390593047034, "grad_norm": 11.695270833569362, "learning_rate": 3.2635377937622557e-07, "loss": 1.2095, "step": 7887 }, { "epoch": 2.6884798909338787, "grad_norm": 15.081144001207846, "learning_rate": 3.2564943885455114e-07, "loss": 0.9385, "step": 7888 }, { "epoch": 2.6888207225630536, "grad_norm": 12.97981022892759, "learning_rate": 3.249458336268263e-07, "loss": 1.2923, "step": 7889 }, { "epoch": 2.689161554192229, "grad_norm": 12.271765563489652, "learning_rate": 3.242429638037342e-07, "loss": 1.1517, "step": 7890 }, { "epoch": 2.689502385821404, "grad_norm": 16.043021741690577, "learning_rate": 3.2354082949583587e-07, "loss": 1.4855, "step": 7891 }, { "epoch": 2.6898432174505795, "grad_norm": 10.777005923528108, "learning_rate": 3.228394308135813e-07, "loss": 0.8444, "step": 7892 }, { "epoch": 2.690184049079755, "grad_norm": 13.144777943409839, "learning_rate": 3.221387678673027e-07, "loss": 1.2258, "step": 7893 }, { "epoch": 2.6905248807089297, "grad_norm": 23.698076661797646, "learning_rate": 3.2143884076721753e-07, "loss": 1.407, "step": 7894 }, { "epoch": 2.690865712338105, "grad_norm": 12.27075959737697, "learning_rate": 3.207396496234266e-07, "loss": 0.7508, "step": 7895 }, { "epoch": 2.6912065439672803, "grad_norm": 14.639541514138164, "learning_rate": 3.2004119454591643e-07, "loss": 1.44, "step": 7896 }, { "epoch": 2.691547375596455, "grad_norm": 14.625336550304194, "learning_rate": 3.193434756445557e-07, "loss": 1.3885, "step": 7897 }, { "epoch": 2.6918882072256305, "grad_norm": 11.18334700566323, "learning_rate": 3.186464930290989e-07, "loss": 1.0583, "step": 7898 }, { "epoch": 2.692229038854806, "grad_norm": 9.413567707880546, "learning_rate": 3.179502468091844e-07, "loss": 1.1087, "step": 7899 }, { "epoch": 2.6925698704839807, "grad_norm": 11.717920562710704, "learning_rate": 3.1725473709433405e-07, "loss": 1.2788, "step": 7900 }, { "epoch": 2.692910702113156, "grad_norm": 49.289291540221626, "learning_rate": 3.1655996399395483e-07, "loss": 1.4423, "step": 7901 }, { "epoch": 2.6932515337423313, "grad_norm": 13.531759992399126, "learning_rate": 3.158659276173365e-07, "loss": 1.2649, "step": 7902 }, { "epoch": 2.6935923653715066, "grad_norm": 34.61880627059901, "learning_rate": 3.1517262807365554e-07, "loss": 1.4064, "step": 7903 }, { "epoch": 2.693933197000682, "grad_norm": 18.16156681330489, "learning_rate": 3.144800654719676e-07, "loss": 1.8184, "step": 7904 }, { "epoch": 2.6942740286298568, "grad_norm": 18.31046686179254, "learning_rate": 3.1378823992121776e-07, "loss": 1.5392, "step": 7905 }, { "epoch": 2.694614860259032, "grad_norm": 29.328940182679954, "learning_rate": 3.130971515302339e-07, "loss": 1.3962, "step": 7906 }, { "epoch": 2.6949556918882074, "grad_norm": 20.036750014921676, "learning_rate": 3.124068004077235e-07, "loss": 1.1446, "step": 7907 }, { "epoch": 2.6952965235173822, "grad_norm": 12.672395051674501, "learning_rate": 3.117171866622853e-07, "loss": 1.3195, "step": 7908 }, { "epoch": 2.6956373551465576, "grad_norm": 17.197056183392977, "learning_rate": 3.110283104023959e-07, "loss": 1.3408, "step": 7909 }, { "epoch": 2.695978186775733, "grad_norm": 13.148957632326098, "learning_rate": 3.103401717364185e-07, "loss": 1.314, "step": 7910 }, { "epoch": 2.6963190184049077, "grad_norm": 14.401642031317264, "learning_rate": 3.096527707726016e-07, "loss": 1.2919, "step": 7911 }, { "epoch": 2.696659850034083, "grad_norm": 23.791054390101714, "learning_rate": 3.0896610761907474e-07, "loss": 1.4195, "step": 7912 }, { "epoch": 2.6970006816632583, "grad_norm": 21.685901644878495, "learning_rate": 3.082801823838527e-07, "loss": 1.0859, "step": 7913 }, { "epoch": 2.6973415132924337, "grad_norm": 16.67645659565729, "learning_rate": 3.075949951748347e-07, "loss": 1.7195, "step": 7914 }, { "epoch": 2.697682344921609, "grad_norm": 11.198207929606362, "learning_rate": 3.069105460998034e-07, "loss": 1.0759, "step": 7915 }, { "epoch": 2.698023176550784, "grad_norm": 13.747794339084923, "learning_rate": 3.062268352664255e-07, "loss": 1.5475, "step": 7916 }, { "epoch": 2.698364008179959, "grad_norm": 22.208438034093966, "learning_rate": 3.05543862782251e-07, "loss": 1.6429, "step": 7917 }, { "epoch": 2.6987048398091344, "grad_norm": 20.617760878037288, "learning_rate": 3.048616287547146e-07, "loss": 1.9196, "step": 7918 }, { "epoch": 2.6990456714383093, "grad_norm": 15.862016538980958, "learning_rate": 3.0418013329113483e-07, "loss": 1.3976, "step": 7919 }, { "epoch": 2.6993865030674846, "grad_norm": 16.443246158018482, "learning_rate": 3.034993764987115e-07, "loss": 1.4784, "step": 7920 }, { "epoch": 2.69972733469666, "grad_norm": 17.972093647965423, "learning_rate": 3.028193584845329e-07, "loss": 2.0999, "step": 7921 }, { "epoch": 2.700068166325835, "grad_norm": 8.330028479386932, "learning_rate": 3.0214007935556844e-07, "loss": 1.0816, "step": 7922 }, { "epoch": 2.70040899795501, "grad_norm": 17.025962566902162, "learning_rate": 3.0146153921866873e-07, "loss": 1.4721, "step": 7923 }, { "epoch": 2.7007498295841854, "grad_norm": 27.543883639883468, "learning_rate": 3.007837381805745e-07, "loss": 1.1798, "step": 7924 }, { "epoch": 2.7010906612133607, "grad_norm": 20.45026199532222, "learning_rate": 3.001066763479038e-07, "loss": 1.4235, "step": 7925 }, { "epoch": 2.701431492842536, "grad_norm": 13.460894753004986, "learning_rate": 2.9943035382716143e-07, "loss": 1.6709, "step": 7926 }, { "epoch": 2.701772324471711, "grad_norm": 16.02992666212317, "learning_rate": 2.987547707247379e-07, "loss": 1.6812, "step": 7927 }, { "epoch": 2.702113156100886, "grad_norm": 21.604582800458903, "learning_rate": 2.980799271469026e-07, "loss": 0.7154, "step": 7928 }, { "epoch": 2.7024539877300615, "grad_norm": 17.941121156708135, "learning_rate": 2.9740582319981227e-07, "loss": 1.6154, "step": 7929 }, { "epoch": 2.7027948193592364, "grad_norm": 18.675835699468696, "learning_rate": 2.9673245898950543e-07, "loss": 1.7565, "step": 7930 }, { "epoch": 2.7031356509884117, "grad_norm": 17.76911188752681, "learning_rate": 2.9605983462190567e-07, "loss": 1.6894, "step": 7931 }, { "epoch": 2.703476482617587, "grad_norm": 15.041053693535646, "learning_rate": 2.953879502028195e-07, "loss": 0.9798, "step": 7932 }, { "epoch": 2.703817314246762, "grad_norm": 19.091477130465055, "learning_rate": 2.947168058379363e-07, "loss": 2.2291, "step": 7933 }, { "epoch": 2.704158145875937, "grad_norm": 16.444759844741558, "learning_rate": 2.9404640163283037e-07, "loss": 1.8707, "step": 7934 }, { "epoch": 2.7044989775051125, "grad_norm": 13.226558385509746, "learning_rate": 2.9337673769295916e-07, "loss": 0.7609, "step": 7935 }, { "epoch": 2.704839809134288, "grad_norm": 13.554132798897864, "learning_rate": 2.927078141236628e-07, "loss": 1.3824, "step": 7936 }, { "epoch": 2.705180640763463, "grad_norm": 15.31613950929831, "learning_rate": 2.9203963103016655e-07, "loss": 1.5386, "step": 7937 }, { "epoch": 2.705521472392638, "grad_norm": 11.384348445800972, "learning_rate": 2.913721885175763e-07, "loss": 1.048, "step": 7938 }, { "epoch": 2.7058623040218133, "grad_norm": 19.080066690077146, "learning_rate": 2.907054866908854e-07, "loss": 1.7981, "step": 7939 }, { "epoch": 2.7062031356509886, "grad_norm": 21.55670071652056, "learning_rate": 2.9003952565496875e-07, "loss": 1.2614, "step": 7940 }, { "epoch": 2.7065439672801634, "grad_norm": 11.001998137699625, "learning_rate": 2.893743055145826e-07, "loss": 1.1502, "step": 7941 }, { "epoch": 2.7068847989093388, "grad_norm": 15.000966879405219, "learning_rate": 2.887098263743715e-07, "loss": 1.2024, "step": 7942 }, { "epoch": 2.707225630538514, "grad_norm": 15.273645974282932, "learning_rate": 2.880460883388586e-07, "loss": 1.2917, "step": 7943 }, { "epoch": 2.707566462167689, "grad_norm": 20.660426498154, "learning_rate": 2.8738309151245315e-07, "loss": 1.7341, "step": 7944 }, { "epoch": 2.7079072937968642, "grad_norm": 19.556665131861717, "learning_rate": 2.8672083599944724e-07, "loss": 1.8222, "step": 7945 }, { "epoch": 2.7082481254260395, "grad_norm": 13.659075378376922, "learning_rate": 2.8605932190401643e-07, "loss": 0.8071, "step": 7946 }, { "epoch": 2.708588957055215, "grad_norm": 197.61682057999684, "learning_rate": 2.8539854933021916e-07, "loss": 1.4817, "step": 7947 }, { "epoch": 2.70892978868439, "grad_norm": 17.5158688269508, "learning_rate": 2.847385183819984e-07, "loss": 1.038, "step": 7948 }, { "epoch": 2.709270620313565, "grad_norm": 13.580501244059793, "learning_rate": 2.840792291631783e-07, "loss": 1.5004, "step": 7949 }, { "epoch": 2.7096114519427403, "grad_norm": 14.597328595328262, "learning_rate": 2.8342068177746874e-07, "loss": 1.3535, "step": 7950 }, { "epoch": 2.7099522835719156, "grad_norm": 18.293684932746842, "learning_rate": 2.827628763284618e-07, "loss": 1.4249, "step": 7951 }, { "epoch": 2.7102931152010905, "grad_norm": 14.248178669128482, "learning_rate": 2.8210581291963244e-07, "loss": 1.8212, "step": 7952 }, { "epoch": 2.710633946830266, "grad_norm": 15.36798637784822, "learning_rate": 2.8144949165434034e-07, "loss": 1.2004, "step": 7953 }, { "epoch": 2.710974778459441, "grad_norm": 8.547773084224296, "learning_rate": 2.80793912635825e-07, "loss": 0.8923, "step": 7954 }, { "epoch": 2.711315610088616, "grad_norm": 28.013229893766844, "learning_rate": 2.801390759672146e-07, "loss": 1.6774, "step": 7955 }, { "epoch": 2.7116564417177913, "grad_norm": 12.748486342859875, "learning_rate": 2.794849817515166e-07, "loss": 1.4802, "step": 7956 }, { "epoch": 2.7119972733469666, "grad_norm": 19.36399524164748, "learning_rate": 2.788316300916205e-07, "loss": 2.0698, "step": 7957 }, { "epoch": 2.712338104976142, "grad_norm": 13.754776991418014, "learning_rate": 2.781790210903046e-07, "loss": 1.5114, "step": 7958 }, { "epoch": 2.7126789366053172, "grad_norm": 24.89201462627269, "learning_rate": 2.7752715485022454e-07, "loss": 1.2071, "step": 7959 }, { "epoch": 2.713019768234492, "grad_norm": 15.878326604973246, "learning_rate": 2.7687603147392105e-07, "loss": 1.5331, "step": 7960 }, { "epoch": 2.7133605998636674, "grad_norm": 9.216239558475298, "learning_rate": 2.7622565106382116e-07, "loss": 0.987, "step": 7961 }, { "epoch": 2.7137014314928427, "grad_norm": 17.055067880531148, "learning_rate": 2.7557601372222966e-07, "loss": 1.3391, "step": 7962 }, { "epoch": 2.7140422631220176, "grad_norm": 13.728079820074564, "learning_rate": 2.749271195513387e-07, "loss": 1.0585, "step": 7963 }, { "epoch": 2.714383094751193, "grad_norm": 12.936745243100821, "learning_rate": 2.7427896865322043e-07, "loss": 1.2226, "step": 7964 }, { "epoch": 2.714723926380368, "grad_norm": 17.02869242598727, "learning_rate": 2.736315611298329e-07, "loss": 1.834, "step": 7965 }, { "epoch": 2.715064758009543, "grad_norm": 15.713471282809051, "learning_rate": 2.7298489708301513e-07, "loss": 1.3336, "step": 7966 }, { "epoch": 2.7154055896387184, "grad_norm": 23.292500614783695, "learning_rate": 2.723389766144907e-07, "loss": 0.8076, "step": 7967 }, { "epoch": 2.7157464212678937, "grad_norm": 52.772783225283014, "learning_rate": 2.716937998258645e-07, "loss": 1.1896, "step": 7968 }, { "epoch": 2.716087252897069, "grad_norm": 17.030356438238478, "learning_rate": 2.710493668186259e-07, "loss": 1.4926, "step": 7969 }, { "epoch": 2.7164280845262443, "grad_norm": 12.703133432744371, "learning_rate": 2.704056776941466e-07, "loss": 1.5019, "step": 7970 }, { "epoch": 2.716768916155419, "grad_norm": 12.15774823127215, "learning_rate": 2.697627325536828e-07, "loss": 1.5022, "step": 7971 }, { "epoch": 2.7171097477845945, "grad_norm": 16.903253828203827, "learning_rate": 2.691205314983697e-07, "loss": 1.3218, "step": 7972 }, { "epoch": 2.7174505794137698, "grad_norm": 16.43967837499582, "learning_rate": 2.684790746292298e-07, "loss": 1.7903, "step": 7973 }, { "epoch": 2.7177914110429446, "grad_norm": 17.94141438178002, "learning_rate": 2.678383620471675e-07, "loss": 1.7723, "step": 7974 }, { "epoch": 2.71813224267212, "grad_norm": 15.147567769591271, "learning_rate": 2.671983938529671e-07, "loss": 1.1167, "step": 7975 }, { "epoch": 2.7184730743012953, "grad_norm": 19.23553444642319, "learning_rate": 2.665591701473014e-07, "loss": 1.3393, "step": 7976 }, { "epoch": 2.71881390593047, "grad_norm": 14.29565455848557, "learning_rate": 2.6592069103071993e-07, "loss": 1.7271, "step": 7977 }, { "epoch": 2.7191547375596454, "grad_norm": 23.834010306685858, "learning_rate": 2.6528295660365855e-07, "loss": 1.22, "step": 7978 }, { "epoch": 2.7194955691888207, "grad_norm": 21.6623954553323, "learning_rate": 2.646459669664364e-07, "loss": 1.6547, "step": 7979 }, { "epoch": 2.719836400817996, "grad_norm": 12.221742410829695, "learning_rate": 2.6400972221925394e-07, "loss": 1.2655, "step": 7980 }, { "epoch": 2.7201772324471714, "grad_norm": 10.577518404491006, "learning_rate": 2.633742224621949e-07, "loss": 1.2598, "step": 7981 }, { "epoch": 2.720518064076346, "grad_norm": 19.048002977817436, "learning_rate": 2.627394677952266e-07, "loss": 1.8956, "step": 7982 }, { "epoch": 2.7208588957055215, "grad_norm": 18.86522345361547, "learning_rate": 2.6210545831819756e-07, "loss": 1.4989, "step": 7983 }, { "epoch": 2.721199727334697, "grad_norm": 15.591515666256416, "learning_rate": 2.614721941308401e-07, "loss": 1.531, "step": 7984 }, { "epoch": 2.7215405589638717, "grad_norm": 14.383243902356828, "learning_rate": 2.608396753327702e-07, "loss": 1.3646, "step": 7985 }, { "epoch": 2.721881390593047, "grad_norm": 14.444769449514615, "learning_rate": 2.6020790202348434e-07, "loss": 1.6323, "step": 7986 }, { "epoch": 2.7222222222222223, "grad_norm": 14.580964161532115, "learning_rate": 2.595768743023641e-07, "loss": 1.4098, "step": 7987 }, { "epoch": 2.722563053851397, "grad_norm": 15.429590082523145, "learning_rate": 2.589465922686712e-07, "loss": 1.6463, "step": 7988 }, { "epoch": 2.7229038854805725, "grad_norm": 15.750203964276686, "learning_rate": 2.5831705602155256e-07, "loss": 1.4012, "step": 7989 }, { "epoch": 2.723244717109748, "grad_norm": 21.601554094027126, "learning_rate": 2.576882656600377e-07, "loss": 1.7735, "step": 7990 }, { "epoch": 2.723585548738923, "grad_norm": 12.454560739481266, "learning_rate": 2.5706022128303474e-07, "loss": 1.4361, "step": 7991 }, { "epoch": 2.7239263803680984, "grad_norm": 18.011516499393014, "learning_rate": 2.564329229893414e-07, "loss": 1.6007, "step": 7992 }, { "epoch": 2.7242672119972733, "grad_norm": 12.108549131983066, "learning_rate": 2.558063708776315e-07, "loss": 1.2894, "step": 7993 }, { "epoch": 2.7246080436264486, "grad_norm": 15.134777705919934, "learning_rate": 2.551805650464645e-07, "loss": 1.997, "step": 7994 }, { "epoch": 2.724948875255624, "grad_norm": 26.99305205880644, "learning_rate": 2.545555055942839e-07, "loss": 1.6634, "step": 7995 }, { "epoch": 2.7252897068847988, "grad_norm": 11.946238786918919, "learning_rate": 2.5393119261941215e-07, "loss": 1.5398, "step": 7996 }, { "epoch": 2.725630538513974, "grad_norm": 15.423249571773761, "learning_rate": 2.533076262200568e-07, "loss": 1.1774, "step": 7997 }, { "epoch": 2.7259713701431494, "grad_norm": 23.729368309896937, "learning_rate": 2.5268480649430717e-07, "loss": 1.2914, "step": 7998 }, { "epoch": 2.7263122017723243, "grad_norm": 16.98704957041396, "learning_rate": 2.5206273354013545e-07, "loss": 1.8676, "step": 7999 }, { "epoch": 2.7266530334014996, "grad_norm": 16.927803959374334, "learning_rate": 2.5144140745539615e-07, "loss": 1.701, "step": 8000 }, { "epoch": 2.726993865030675, "grad_norm": 14.542656185517494, "learning_rate": 2.508208283378266e-07, "loss": 0.941, "step": 8001 }, { "epoch": 2.72733469665985, "grad_norm": 14.926994949286993, "learning_rate": 2.5020099628504603e-07, "loss": 1.2465, "step": 8002 }, { "epoch": 2.7276755282890255, "grad_norm": 20.274184177559007, "learning_rate": 2.4958191139455635e-07, "loss": 1.9504, "step": 8003 }, { "epoch": 2.7280163599182004, "grad_norm": 18.976217290048623, "learning_rate": 2.4896357376374247e-07, "loss": 1.1565, "step": 8004 }, { "epoch": 2.7283571915473757, "grad_norm": 18.09198532526831, "learning_rate": 2.4834598348987215e-07, "loss": 1.4387, "step": 8005 }, { "epoch": 2.728698023176551, "grad_norm": 19.989631264383593, "learning_rate": 2.477291406700927e-07, "loss": 1.7646, "step": 8006 }, { "epoch": 2.729038854805726, "grad_norm": 14.428165954028136, "learning_rate": 2.47113045401437e-07, "loss": 1.1664, "step": 8007 }, { "epoch": 2.729379686434901, "grad_norm": 13.06914767547314, "learning_rate": 2.464976977808209e-07, "loss": 1.5235, "step": 8008 }, { "epoch": 2.7297205180640765, "grad_norm": 16.385229766847026, "learning_rate": 2.4588309790503807e-07, "loss": 1.6467, "step": 8009 }, { "epoch": 2.7300613496932513, "grad_norm": 17.252306136240176, "learning_rate": 2.4526924587076897e-07, "loss": 1.7769, "step": 8010 }, { "epoch": 2.7304021813224266, "grad_norm": 27.53897805848033, "learning_rate": 2.446561417745763e-07, "loss": 1.0877, "step": 8011 }, { "epoch": 2.730743012951602, "grad_norm": 17.979534407460974, "learning_rate": 2.4404378571290134e-07, "loss": 1.1576, "step": 8012 }, { "epoch": 2.7310838445807772, "grad_norm": 18.01615740087379, "learning_rate": 2.434321777820714e-07, "loss": 1.5651, "step": 8013 }, { "epoch": 2.7314246762099526, "grad_norm": 17.268431744829886, "learning_rate": 2.4282131807829514e-07, "loss": 1.6115, "step": 8014 }, { "epoch": 2.7317655078391274, "grad_norm": 33.01078195286599, "learning_rate": 2.422112066976623e-07, "loss": 1.4326, "step": 8015 }, { "epoch": 2.7321063394683027, "grad_norm": 21.726212750313106, "learning_rate": 2.416018437361461e-07, "loss": 1.669, "step": 8016 }, { "epoch": 2.732447171097478, "grad_norm": 15.88764291068917, "learning_rate": 2.409932292896028e-07, "loss": 1.5688, "step": 8017 }, { "epoch": 2.732788002726653, "grad_norm": 12.60672310828375, "learning_rate": 2.4038536345376837e-07, "loss": 0.922, "step": 8018 }, { "epoch": 2.733128834355828, "grad_norm": 15.17108067484828, "learning_rate": 2.3977824632426315e-07, "loss": 1.8778, "step": 8019 }, { "epoch": 2.7334696659850035, "grad_norm": 14.832164972405886, "learning_rate": 2.391718779965896e-07, "loss": 1.4675, "step": 8020 }, { "epoch": 2.7338104976141784, "grad_norm": 18.067165350662524, "learning_rate": 2.385662585661319e-07, "loss": 0.8629, "step": 8021 }, { "epoch": 2.7341513292433537, "grad_norm": 17.49616322115636, "learning_rate": 2.379613881281545e-07, "loss": 1.3502, "step": 8022 }, { "epoch": 2.734492160872529, "grad_norm": 18.568013787983052, "learning_rate": 2.373572667778079e-07, "loss": 1.485, "step": 8023 }, { "epoch": 2.7348329925017043, "grad_norm": 16.08490939312666, "learning_rate": 2.3675389461012277e-07, "loss": 0.8774, "step": 8024 }, { "epoch": 2.7351738241308796, "grad_norm": 16.184170556922993, "learning_rate": 2.361512717200104e-07, "loss": 1.0303, "step": 8025 }, { "epoch": 2.7355146557600545, "grad_norm": 12.298412031776735, "learning_rate": 2.355493982022683e-07, "loss": 1.498, "step": 8026 }, { "epoch": 2.73585548738923, "grad_norm": 10.659676435373422, "learning_rate": 2.3494827415157073e-07, "loss": 1.3037, "step": 8027 }, { "epoch": 2.736196319018405, "grad_norm": 25.500503130277124, "learning_rate": 2.3434789966247819e-07, "loss": 1.6319, "step": 8028 }, { "epoch": 2.73653715064758, "grad_norm": 15.841047405476026, "learning_rate": 2.3374827482943284e-07, "loss": 1.8296, "step": 8029 }, { "epoch": 2.7368779822767553, "grad_norm": 11.466089992123193, "learning_rate": 2.3314939974675654e-07, "loss": 0.7136, "step": 8030 }, { "epoch": 2.7372188139059306, "grad_norm": 19.605687241656792, "learning_rate": 2.3255127450865556e-07, "loss": 2.2206, "step": 8031 }, { "epoch": 2.7375596455351054, "grad_norm": 20.037781253174384, "learning_rate": 2.319538992092174e-07, "loss": 1.5214, "step": 8032 }, { "epoch": 2.7379004771642808, "grad_norm": 12.185185582976224, "learning_rate": 2.3135727394241135e-07, "loss": 0.933, "step": 8033 }, { "epoch": 2.738241308793456, "grad_norm": 15.932446870108082, "learning_rate": 2.3076139880208848e-07, "loss": 1.413, "step": 8034 }, { "epoch": 2.7385821404226314, "grad_norm": 11.456850308827228, "learning_rate": 2.301662738819832e-07, "loss": 0.9231, "step": 8035 }, { "epoch": 2.7389229720518067, "grad_norm": 14.221084584620275, "learning_rate": 2.2957189927571067e-07, "loss": 1.3425, "step": 8036 }, { "epoch": 2.7392638036809815, "grad_norm": 10.547114177096399, "learning_rate": 2.2897827507676828e-07, "loss": 1.0822, "step": 8037 }, { "epoch": 2.739604635310157, "grad_norm": 35.503599147557075, "learning_rate": 2.283854013785358e-07, "loss": 1.684, "step": 8038 }, { "epoch": 2.739945466939332, "grad_norm": 12.314456891851659, "learning_rate": 2.2779327827427423e-07, "loss": 1.4911, "step": 8039 }, { "epoch": 2.740286298568507, "grad_norm": 19.215651314397363, "learning_rate": 2.2720190585712675e-07, "loss": 1.7581, "step": 8040 }, { "epoch": 2.7406271301976823, "grad_norm": 13.762081904524175, "learning_rate": 2.2661128422011902e-07, "loss": 1.5541, "step": 8041 }, { "epoch": 2.7409679618268576, "grad_norm": 23.544006338046398, "learning_rate": 2.260214134561589e-07, "loss": 1.5338, "step": 8042 }, { "epoch": 2.7413087934560325, "grad_norm": 13.891976178921853, "learning_rate": 2.2543229365803388e-07, "loss": 1.2814, "step": 8043 }, { "epoch": 2.741649625085208, "grad_norm": 15.630492025749321, "learning_rate": 2.2484392491841478e-07, "loss": 1.5571, "step": 8044 }, { "epoch": 2.741990456714383, "grad_norm": 17.747622878697886, "learning_rate": 2.242563073298565e-07, "loss": 1.6969, "step": 8045 }, { "epoch": 2.7423312883435584, "grad_norm": 12.676554466246637, "learning_rate": 2.236694409847917e-07, "loss": 1.2652, "step": 8046 }, { "epoch": 2.7426721199727337, "grad_norm": 15.438378465563972, "learning_rate": 2.230833259755366e-07, "loss": 1.2396, "step": 8047 }, { "epoch": 2.7430129516019086, "grad_norm": 12.05975451604001, "learning_rate": 2.2249796239429077e-07, "loss": 1.755, "step": 8048 }, { "epoch": 2.743353783231084, "grad_norm": 16.654247204596377, "learning_rate": 2.2191335033313333e-07, "loss": 0.792, "step": 8049 }, { "epoch": 2.7436946148602592, "grad_norm": 18.227265482141664, "learning_rate": 2.213294898840268e-07, "loss": 1.0457, "step": 8050 }, { "epoch": 2.744035446489434, "grad_norm": 13.590446656273961, "learning_rate": 2.2074638113881387e-07, "loss": 1.1177, "step": 8051 }, { "epoch": 2.7443762781186094, "grad_norm": 11.798789466556903, "learning_rate": 2.2016402418922e-07, "loss": 0.6563, "step": 8052 }, { "epoch": 2.7447171097477847, "grad_norm": 21.662529484682764, "learning_rate": 2.1958241912685308e-07, "loss": 1.272, "step": 8053 }, { "epoch": 2.7450579413769596, "grad_norm": 17.469318493486988, "learning_rate": 2.1900156604320154e-07, "loss": 1.3762, "step": 8054 }, { "epoch": 2.745398773006135, "grad_norm": 15.153564160173236, "learning_rate": 2.1842146502963568e-07, "loss": 0.5443, "step": 8055 }, { "epoch": 2.74573960463531, "grad_norm": 17.427679750970054, "learning_rate": 2.1784211617740692e-07, "loss": 1.685, "step": 8056 }, { "epoch": 2.7460804362644855, "grad_norm": 12.666362059019631, "learning_rate": 2.172635195776507e-07, "loss": 1.245, "step": 8057 }, { "epoch": 2.7464212678936604, "grad_norm": 11.384723431949269, "learning_rate": 2.1668567532138195e-07, "loss": 1.179, "step": 8058 }, { "epoch": 2.7467620995228357, "grad_norm": 13.837598869528144, "learning_rate": 2.1610858349949693e-07, "loss": 1.7304, "step": 8059 }, { "epoch": 2.747102931152011, "grad_norm": 40.22327788582566, "learning_rate": 2.1553224420277629e-07, "loss": 1.6781, "step": 8060 }, { "epoch": 2.7474437627811863, "grad_norm": 13.87101584810105, "learning_rate": 2.1495665752187866e-07, "loss": 1.1848, "step": 8061 }, { "epoch": 2.747784594410361, "grad_norm": 15.465899971504937, "learning_rate": 2.143818235473466e-07, "loss": 1.1726, "step": 8062 }, { "epoch": 2.7481254260395365, "grad_norm": 12.883909498844002, "learning_rate": 2.1380774236960555e-07, "loss": 0.8505, "step": 8063 }, { "epoch": 2.7484662576687118, "grad_norm": 25.576814505003547, "learning_rate": 2.1323441407895883e-07, "loss": 1.0972, "step": 8064 }, { "epoch": 2.7488070892978866, "grad_norm": 11.323084713368194, "learning_rate": 2.1266183876559377e-07, "loss": 1.3365, "step": 8065 }, { "epoch": 2.749147920927062, "grad_norm": 13.121440056508133, "learning_rate": 2.120900165195783e-07, "loss": 1.2805, "step": 8066 }, { "epoch": 2.7494887525562373, "grad_norm": 13.27551493476925, "learning_rate": 2.115189474308632e-07, "loss": 1.0859, "step": 8067 }, { "epoch": 2.7498295841854126, "grad_norm": 15.871500629656257, "learning_rate": 2.1094863158927947e-07, "loss": 1.148, "step": 8068 }, { "epoch": 2.7501704158145874, "grad_norm": 14.570448611242304, "learning_rate": 2.1037906908453976e-07, "loss": 1.4385, "step": 8069 }, { "epoch": 2.7505112474437627, "grad_norm": 11.878778618718865, "learning_rate": 2.0981026000623905e-07, "loss": 0.9275, "step": 8070 }, { "epoch": 2.750852079072938, "grad_norm": 13.572795450176923, "learning_rate": 2.09242204443853e-07, "loss": 1.1234, "step": 8071 }, { "epoch": 2.7511929107021134, "grad_norm": 17.78385295705943, "learning_rate": 2.0867490248673962e-07, "loss": 1.6557, "step": 8072 }, { "epoch": 2.7515337423312882, "grad_norm": 15.342226935848622, "learning_rate": 2.0810835422413634e-07, "loss": 1.9695, "step": 8073 }, { "epoch": 2.7518745739604635, "grad_norm": 16.120582437686735, "learning_rate": 2.0754255974516578e-07, "loss": 1.4478, "step": 8074 }, { "epoch": 2.752215405589639, "grad_norm": 20.0350989903574, "learning_rate": 2.069775191388268e-07, "loss": 1.0904, "step": 8075 }, { "epoch": 2.7525562372188137, "grad_norm": 14.254041110146149, "learning_rate": 2.0641323249400492e-07, "loss": 1.2898, "step": 8076 }, { "epoch": 2.752897068847989, "grad_norm": 15.085266736779047, "learning_rate": 2.0584969989946357e-07, "loss": 1.9499, "step": 8077 }, { "epoch": 2.7532379004771643, "grad_norm": 19.4120950492033, "learning_rate": 2.05286921443848e-07, "loss": 1.5242, "step": 8078 }, { "epoch": 2.7535787321063396, "grad_norm": 13.963152037505315, "learning_rate": 2.0472489721568733e-07, "loss": 1.4645, "step": 8079 }, { "epoch": 2.7539195637355145, "grad_norm": 13.966458299296342, "learning_rate": 2.0416362730338867e-07, "loss": 1.5248, "step": 8080 }, { "epoch": 2.75426039536469, "grad_norm": 14.45055689143638, "learning_rate": 2.0360311179524306e-07, "loss": 1.8254, "step": 8081 }, { "epoch": 2.754601226993865, "grad_norm": 13.967849206101805, "learning_rate": 2.030433507794205e-07, "loss": 1.611, "step": 8082 }, { "epoch": 2.7549420586230404, "grad_norm": 14.04047260996693, "learning_rate": 2.02484344343975e-07, "loss": 1.7143, "step": 8083 }, { "epoch": 2.7552828902522153, "grad_norm": 16.53607040341396, "learning_rate": 2.019260925768396e-07, "loss": 1.1461, "step": 8084 }, { "epoch": 2.7556237218813906, "grad_norm": 12.538006422886921, "learning_rate": 2.013685955658301e-07, "loss": 0.8306, "step": 8085 }, { "epoch": 2.755964553510566, "grad_norm": 20.61115446825965, "learning_rate": 2.0081185339864307e-07, "loss": 1.2447, "step": 8086 }, { "epoch": 2.7563053851397408, "grad_norm": 16.016191724387728, "learning_rate": 2.0025586616285564e-07, "loss": 1.6995, "step": 8087 }, { "epoch": 2.756646216768916, "grad_norm": 14.679644649384212, "learning_rate": 1.9970063394592731e-07, "loss": 1.7446, "step": 8088 }, { "epoch": 2.7569870483980914, "grad_norm": 16.1910583453371, "learning_rate": 1.9914615683519821e-07, "loss": 1.8297, "step": 8089 }, { "epoch": 2.7573278800272663, "grad_norm": 12.82728727464394, "learning_rate": 1.9859243491788915e-07, "loss": 1.2544, "step": 8090 }, { "epoch": 2.7576687116564416, "grad_norm": 15.274887421047548, "learning_rate": 1.9803946828110376e-07, "loss": 1.6071, "step": 8091 }, { "epoch": 2.758009543285617, "grad_norm": 19.60198842198723, "learning_rate": 1.974872570118258e-07, "loss": 1.8844, "step": 8092 }, { "epoch": 2.758350374914792, "grad_norm": 11.205531341747417, "learning_rate": 1.9693580119691912e-07, "loss": 1.5732, "step": 8093 }, { "epoch": 2.7586912065439675, "grad_norm": 16.345092787496416, "learning_rate": 1.9638510092313212e-07, "loss": 1.2704, "step": 8094 }, { "epoch": 2.7590320381731424, "grad_norm": 17.53788784878188, "learning_rate": 1.9583515627708993e-07, "loss": 1.4721, "step": 8095 }, { "epoch": 2.7593728698023177, "grad_norm": 13.00074760463257, "learning_rate": 1.952859673453017e-07, "loss": 1.4418, "step": 8096 }, { "epoch": 2.759713701431493, "grad_norm": 20.376683696667325, "learning_rate": 1.9473753421415831e-07, "loss": 1.4673, "step": 8097 }, { "epoch": 2.760054533060668, "grad_norm": 13.146924266413274, "learning_rate": 1.9418985696992853e-07, "loss": 1.5924, "step": 8098 }, { "epoch": 2.760395364689843, "grad_norm": 15.543786153447835, "learning_rate": 1.9364293569876514e-07, "loss": 1.4013, "step": 8099 }, { "epoch": 2.7607361963190185, "grad_norm": 16.976442657760913, "learning_rate": 1.930967704867015e-07, "loss": 1.2542, "step": 8100 }, { "epoch": 2.7610770279481933, "grad_norm": 15.289762489913526, "learning_rate": 1.925513614196506e-07, "loss": 1.9644, "step": 8101 }, { "epoch": 2.7614178595773686, "grad_norm": 18.86526358065276, "learning_rate": 1.920067085834082e-07, "loss": 2.0832, "step": 8102 }, { "epoch": 2.761758691206544, "grad_norm": 14.714201981417519, "learning_rate": 1.914628120636497e-07, "loss": 1.4764, "step": 8103 }, { "epoch": 2.7620995228357192, "grad_norm": 13.483246743486895, "learning_rate": 1.9091967194593274e-07, "loss": 1.2286, "step": 8104 }, { "epoch": 2.7624403544648946, "grad_norm": 12.065392854583529, "learning_rate": 1.9037728831569513e-07, "loss": 1.6434, "step": 8105 }, { "epoch": 2.7627811860940694, "grad_norm": 11.983975826867335, "learning_rate": 1.8983566125825581e-07, "loss": 1.1018, "step": 8106 }, { "epoch": 2.7631220177232447, "grad_norm": 13.524987476487178, "learning_rate": 1.8929479085881553e-07, "loss": 1.7251, "step": 8107 }, { "epoch": 2.76346284935242, "grad_norm": 14.25839180750318, "learning_rate": 1.887546772024551e-07, "loss": 1.5016, "step": 8108 }, { "epoch": 2.763803680981595, "grad_norm": 11.891673990896397, "learning_rate": 1.8821532037413603e-07, "loss": 1.2055, "step": 8109 }, { "epoch": 2.76414451261077, "grad_norm": 15.560069261157187, "learning_rate": 1.876767204587021e-07, "loss": 1.699, "step": 8110 }, { "epoch": 2.7644853442399455, "grad_norm": 16.48575922073995, "learning_rate": 1.8713887754087667e-07, "loss": 2.1257, "step": 8111 }, { "epoch": 2.7648261758691204, "grad_norm": 11.446483806331441, "learning_rate": 1.8660179170526372e-07, "loss": 1.304, "step": 8112 }, { "epoch": 2.7651670074982957, "grad_norm": 19.71555273362329, "learning_rate": 1.860654630363512e-07, "loss": 1.3135, "step": 8113 }, { "epoch": 2.765507839127471, "grad_norm": 17.795042535201315, "learning_rate": 1.8552989161850443e-07, "loss": 2.092, "step": 8114 }, { "epoch": 2.7658486707566463, "grad_norm": 18.08334423586602, "learning_rate": 1.8499507753597046e-07, "loss": 1.5266, "step": 8115 }, { "epoch": 2.7661895023858216, "grad_norm": 12.689453039573868, "learning_rate": 1.8446102087287864e-07, "loss": 1.371, "step": 8116 }, { "epoch": 2.7665303340149965, "grad_norm": 18.004566040562793, "learning_rate": 1.8392772171323737e-07, "loss": 1.258, "step": 8117 }, { "epoch": 2.766871165644172, "grad_norm": 12.87824797703949, "learning_rate": 1.8339518014093726e-07, "loss": 1.2489, "step": 8118 }, { "epoch": 2.767211997273347, "grad_norm": 13.442599068079128, "learning_rate": 1.8286339623974912e-07, "loss": 1.2635, "step": 8119 }, { "epoch": 2.767552828902522, "grad_norm": 15.220720239286619, "learning_rate": 1.8233237009332494e-07, "loss": 1.4624, "step": 8120 }, { "epoch": 2.7678936605316973, "grad_norm": 21.813804977064922, "learning_rate": 1.8180210178519676e-07, "loss": 0.94, "step": 8121 }, { "epoch": 2.7682344921608726, "grad_norm": 16.186803555152185, "learning_rate": 1.8127259139877785e-07, "loss": 1.5121, "step": 8122 }, { "epoch": 2.7685753237900474, "grad_norm": 18.893541979082446, "learning_rate": 1.8074383901736324e-07, "loss": 1.509, "step": 8123 }, { "epoch": 2.7689161554192228, "grad_norm": 18.509877734930946, "learning_rate": 1.802158447241259e-07, "loss": 1.1326, "step": 8124 }, { "epoch": 2.769256987048398, "grad_norm": 13.588683146774786, "learning_rate": 1.7968860860212377e-07, "loss": 0.8586, "step": 8125 }, { "epoch": 2.7695978186775734, "grad_norm": 11.538901970070764, "learning_rate": 1.7916213073429222e-07, "loss": 1.387, "step": 8126 }, { "epoch": 2.7699386503067487, "grad_norm": 14.089555591440478, "learning_rate": 1.7863641120344722e-07, "loss": 1.2256, "step": 8127 }, { "epoch": 2.7702794819359235, "grad_norm": 12.401098489948426, "learning_rate": 1.781114500922876e-07, "loss": 1.3816, "step": 8128 }, { "epoch": 2.770620313565099, "grad_norm": 16.178396481575074, "learning_rate": 1.775872474833923e-07, "loss": 0.7967, "step": 8129 }, { "epoch": 2.770961145194274, "grad_norm": 13.748991089538498, "learning_rate": 1.7706380345921926e-07, "loss": 1.1356, "step": 8130 }, { "epoch": 2.771301976823449, "grad_norm": 14.58879699690518, "learning_rate": 1.7654111810210982e-07, "loss": 1.057, "step": 8131 }, { "epoch": 2.7716428084526243, "grad_norm": 28.300996730820135, "learning_rate": 1.760191914942827e-07, "loss": 0.9746, "step": 8132 }, { "epoch": 2.7719836400817996, "grad_norm": 16.37358211517568, "learning_rate": 1.7549802371783997e-07, "loss": 1.5109, "step": 8133 }, { "epoch": 2.7723244717109745, "grad_norm": 20.848378455917345, "learning_rate": 1.7497761485476384e-07, "loss": 1.7734, "step": 8134 }, { "epoch": 2.77266530334015, "grad_norm": 12.692546134076267, "learning_rate": 1.7445796498691603e-07, "loss": 1.3555, "step": 8135 }, { "epoch": 2.773006134969325, "grad_norm": 17.833074802218132, "learning_rate": 1.739390741960395e-07, "loss": 1.3709, "step": 8136 }, { "epoch": 2.7733469665985004, "grad_norm": 15.437789537323338, "learning_rate": 1.734209425637584e-07, "loss": 1.4941, "step": 8137 }, { "epoch": 2.7736877982276757, "grad_norm": 12.13539086192995, "learning_rate": 1.7290357017157645e-07, "loss": 1.4273, "step": 8138 }, { "epoch": 2.7740286298568506, "grad_norm": 24.009442513637342, "learning_rate": 1.723869571008785e-07, "loss": 2.1476, "step": 8139 }, { "epoch": 2.774369461486026, "grad_norm": 25.82321412563603, "learning_rate": 1.7187110343292902e-07, "loss": 1.734, "step": 8140 }, { "epoch": 2.7747102931152012, "grad_norm": 16.25665191623066, "learning_rate": 1.713560092488753e-07, "loss": 1.4015, "step": 8141 }, { "epoch": 2.775051124744376, "grad_norm": 11.366317794320885, "learning_rate": 1.7084167462974366e-07, "loss": 0.7363, "step": 8142 }, { "epoch": 2.7753919563735514, "grad_norm": 14.916498077134852, "learning_rate": 1.703280996564388e-07, "loss": 1.0085, "step": 8143 }, { "epoch": 2.7757327880027267, "grad_norm": 19.020505154638137, "learning_rate": 1.6981528440975114e-07, "loss": 0.9038, "step": 8144 }, { "epoch": 2.7760736196319016, "grad_norm": 17.413386921467023, "learning_rate": 1.6930322897034667e-07, "loss": 1.4372, "step": 8145 }, { "epoch": 2.776414451261077, "grad_norm": 22.745872983405768, "learning_rate": 1.6879193341877377e-07, "loss": 1.2646, "step": 8146 }, { "epoch": 2.776755282890252, "grad_norm": 10.82796902878109, "learning_rate": 1.6828139783546248e-07, "loss": 1.4311, "step": 8147 }, { "epoch": 2.7770961145194275, "grad_norm": 17.098897054899176, "learning_rate": 1.6777162230072031e-07, "loss": 1.8226, "step": 8148 }, { "epoch": 2.777436946148603, "grad_norm": 19.30539874085868, "learning_rate": 1.6726260689473862e-07, "loss": 1.939, "step": 8149 }, { "epoch": 2.7777777777777777, "grad_norm": 12.05028778009361, "learning_rate": 1.667543516975867e-07, "loss": 1.5529, "step": 8150 }, { "epoch": 2.778118609406953, "grad_norm": 13.455676138414612, "learning_rate": 1.6624685678921503e-07, "loss": 1.441, "step": 8151 }, { "epoch": 2.7784594410361283, "grad_norm": 12.2758491450673, "learning_rate": 1.657401222494548e-07, "loss": 1.6013, "step": 8152 }, { "epoch": 2.778800272665303, "grad_norm": 13.728680305466733, "learning_rate": 1.6523414815801775e-07, "loss": 1.0432, "step": 8153 }, { "epoch": 2.7791411042944785, "grad_norm": 18.089662123876025, "learning_rate": 1.6472893459449523e-07, "loss": 1.367, "step": 8154 }, { "epoch": 2.779481935923654, "grad_norm": 13.199877463691424, "learning_rate": 1.6422448163835923e-07, "loss": 1.1024, "step": 8155 }, { "epoch": 2.7798227675528286, "grad_norm": 9.673611797313347, "learning_rate": 1.637207893689624e-07, "loss": 0.8688, "step": 8156 }, { "epoch": 2.780163599182004, "grad_norm": 21.88520773195195, "learning_rate": 1.6321785786553745e-07, "loss": 1.8145, "step": 8157 }, { "epoch": 2.7805044308111793, "grad_norm": 25.78202568123999, "learning_rate": 1.6271568720719777e-07, "loss": 0.9547, "step": 8158 }, { "epoch": 2.7808452624403546, "grad_norm": 14.709572367990148, "learning_rate": 1.6221427747293682e-07, "loss": 1.3391, "step": 8159 }, { "epoch": 2.78118609406953, "grad_norm": 17.31092689615749, "learning_rate": 1.617136287416282e-07, "loss": 1.4305, "step": 8160 }, { "epoch": 2.7815269256987047, "grad_norm": 16.208075083189453, "learning_rate": 1.6121374109202504e-07, "loss": 1.6146, "step": 8161 }, { "epoch": 2.78186775732788, "grad_norm": 23.874164187034555, "learning_rate": 1.6071461460276328e-07, "loss": 1.119, "step": 8162 }, { "epoch": 2.7822085889570554, "grad_norm": 12.240660214643773, "learning_rate": 1.602162493523568e-07, "loss": 0.973, "step": 8163 }, { "epoch": 2.7825494205862302, "grad_norm": 15.174533655837587, "learning_rate": 1.5971864541919958e-07, "loss": 1.7261, "step": 8164 }, { "epoch": 2.7828902522154055, "grad_norm": 15.009358694431825, "learning_rate": 1.592218028815684e-07, "loss": 1.6329, "step": 8165 }, { "epoch": 2.783231083844581, "grad_norm": 15.353075975576331, "learning_rate": 1.5872572181761693e-07, "loss": 1.9149, "step": 8166 }, { "epoch": 2.7835719154737557, "grad_norm": 24.349274586489422, "learning_rate": 1.58230402305381e-07, "loss": 1.4818, "step": 8167 }, { "epoch": 2.783912747102931, "grad_norm": 33.61540114448144, "learning_rate": 1.5773584442277833e-07, "loss": 1.6206, "step": 8168 }, { "epoch": 2.7842535787321063, "grad_norm": 10.956268534053383, "learning_rate": 1.5724204824760225e-07, "loss": 1.1908, "step": 8169 }, { "epoch": 2.7845944103612816, "grad_norm": 15.920096004875978, "learning_rate": 1.5674901385753006e-07, "loss": 1.1008, "step": 8170 }, { "epoch": 2.784935241990457, "grad_norm": 13.306461693949208, "learning_rate": 1.5625674133011747e-07, "loss": 1.5338, "step": 8171 }, { "epoch": 2.785276073619632, "grad_norm": 16.777547186671928, "learning_rate": 1.5576523074280147e-07, "loss": 1.888, "step": 8172 }, { "epoch": 2.785616905248807, "grad_norm": 18.12291279081509, "learning_rate": 1.5527448217289964e-07, "loss": 1.4311, "step": 8173 }, { "epoch": 2.7859577368779824, "grad_norm": 16.04256474860708, "learning_rate": 1.547844956976058e-07, "loss": 1.1867, "step": 8174 }, { "epoch": 2.7862985685071573, "grad_norm": 16.34093620529231, "learning_rate": 1.542952713939988e-07, "loss": 1.1314, "step": 8175 }, { "epoch": 2.7866394001363326, "grad_norm": 15.309479909604221, "learning_rate": 1.5380680933903657e-07, "loss": 1.338, "step": 8176 }, { "epoch": 2.786980231765508, "grad_norm": 12.969034876541869, "learning_rate": 1.533191096095532e-07, "loss": 1.707, "step": 8177 }, { "epoch": 2.7873210633946828, "grad_norm": 15.680194945967575, "learning_rate": 1.5283217228226843e-07, "loss": 1.6934, "step": 8178 }, { "epoch": 2.787661895023858, "grad_norm": 13.769872811503648, "learning_rate": 1.523459974337782e-07, "loss": 1.6648, "step": 8179 }, { "epoch": 2.7880027266530334, "grad_norm": 20.594639264707407, "learning_rate": 1.5186058514055912e-07, "loss": 1.832, "step": 8180 }, { "epoch": 2.7883435582822087, "grad_norm": 14.66963157369847, "learning_rate": 1.5137593547897013e-07, "loss": 1.1305, "step": 8181 }, { "epoch": 2.788684389911384, "grad_norm": 17.34911404610959, "learning_rate": 1.5089204852524798e-07, "loss": 1.3787, "step": 8182 }, { "epoch": 2.789025221540559, "grad_norm": 14.648814750549954, "learning_rate": 1.5040892435550901e-07, "loss": 1.4079, "step": 8183 }, { "epoch": 2.789366053169734, "grad_norm": 17.741851781690094, "learning_rate": 1.4992656304575192e-07, "loss": 1.4321, "step": 8184 }, { "epoch": 2.7897068847989095, "grad_norm": 20.919610011527535, "learning_rate": 1.4944496467185322e-07, "loss": 1.2326, "step": 8185 }, { "epoch": 2.7900477164280844, "grad_norm": 12.059932865922773, "learning_rate": 1.489641293095706e-07, "loss": 1.2192, "step": 8186 }, { "epoch": 2.7903885480572597, "grad_norm": 20.742222940802492, "learning_rate": 1.4848405703454138e-07, "loss": 1.464, "step": 8187 }, { "epoch": 2.790729379686435, "grad_norm": 16.431535105848507, "learning_rate": 1.4800474792228292e-07, "loss": 1.3139, "step": 8188 }, { "epoch": 2.79107021131561, "grad_norm": 21.62666517846386, "learning_rate": 1.4752620204819158e-07, "loss": 0.9921, "step": 8189 }, { "epoch": 2.791411042944785, "grad_norm": 15.99158995092772, "learning_rate": 1.47048419487546e-07, "loss": 1.4136, "step": 8190 }, { "epoch": 2.7917518745739605, "grad_norm": 13.556282574373911, "learning_rate": 1.4657140031550221e-07, "loss": 1.7717, "step": 8191 }, { "epoch": 2.7920927062031358, "grad_norm": 16.641194270395196, "learning_rate": 1.460951446070974e-07, "loss": 1.6968, "step": 8192 }, { "epoch": 2.792433537832311, "grad_norm": 10.885461758708278, "learning_rate": 1.4561965243724885e-07, "loss": 1.3212, "step": 8193 }, { "epoch": 2.792774369461486, "grad_norm": 18.182332932971832, "learning_rate": 1.4514492388075341e-07, "loss": 1.3676, "step": 8194 }, { "epoch": 2.7931152010906612, "grad_norm": 21.334825579813312, "learning_rate": 1.446709590122869e-07, "loss": 1.3119, "step": 8195 }, { "epoch": 2.7934560327198366, "grad_norm": 14.445806806986903, "learning_rate": 1.4419775790640633e-07, "loss": 1.1627, "step": 8196 }, { "epoch": 2.7937968643490114, "grad_norm": 17.26907347538002, "learning_rate": 1.437253206375494e-07, "loss": 1.672, "step": 8197 }, { "epoch": 2.7941376959781867, "grad_norm": 15.507726821319102, "learning_rate": 1.4325364728003e-07, "loss": 1.7855, "step": 8198 }, { "epoch": 2.794478527607362, "grad_norm": 13.866977204571521, "learning_rate": 1.4278273790804654e-07, "loss": 1.3328, "step": 8199 }, { "epoch": 2.794819359236537, "grad_norm": 6.862250622485755, "learning_rate": 1.423125925956731e-07, "loss": 0.6129, "step": 8200 }, { "epoch": 2.795160190865712, "grad_norm": 25.287235182396113, "learning_rate": 1.4184321141686608e-07, "loss": 1.834, "step": 8201 }, { "epoch": 2.7955010224948875, "grad_norm": 14.43193680465507, "learning_rate": 1.4137459444546198e-07, "loss": 1.7952, "step": 8202 }, { "epoch": 2.795841854124063, "grad_norm": 23.653452405922764, "learning_rate": 1.409067417551746e-07, "loss": 1.6094, "step": 8203 }, { "epoch": 2.796182685753238, "grad_norm": 23.04660792240722, "learning_rate": 1.4043965341959952e-07, "loss": 2.0725, "step": 8204 }, { "epoch": 2.796523517382413, "grad_norm": 12.773680533695984, "learning_rate": 1.3997332951221242e-07, "loss": 1.275, "step": 8205 }, { "epoch": 2.7968643490115883, "grad_norm": 11.665971548464492, "learning_rate": 1.395077701063663e-07, "loss": 1.4585, "step": 8206 }, { "epoch": 2.7972051806407636, "grad_norm": 16.451612390528506, "learning_rate": 1.3904297527529754e-07, "loss": 1.7009, "step": 8207 }, { "epoch": 2.7975460122699385, "grad_norm": 16.905497351962605, "learning_rate": 1.3857894509211822e-07, "loss": 1.077, "step": 8208 }, { "epoch": 2.797886843899114, "grad_norm": 16.804404952482624, "learning_rate": 1.381156796298233e-07, "loss": 1.4976, "step": 8209 }, { "epoch": 2.798227675528289, "grad_norm": 18.23956799680442, "learning_rate": 1.376531789612867e-07, "loss": 2.0403, "step": 8210 }, { "epoch": 2.798568507157464, "grad_norm": 15.005499711052913, "learning_rate": 1.3719144315925968e-07, "loss": 1.5192, "step": 8211 }, { "epoch": 2.7989093387866393, "grad_norm": 22.576359684317264, "learning_rate": 1.367304722963775e-07, "loss": 1.2735, "step": 8212 }, { "epoch": 2.7992501704158146, "grad_norm": 15.344371330910842, "learning_rate": 1.3627026644515097e-07, "loss": 0.9488, "step": 8213 }, { "epoch": 2.79959100204499, "grad_norm": 15.345259889830702, "learning_rate": 1.358108256779722e-07, "loss": 1.1782, "step": 8214 }, { "epoch": 2.799931833674165, "grad_norm": 13.68619779074325, "learning_rate": 1.353521500671151e-07, "loss": 1.424, "step": 8215 }, { "epoch": 2.80027266530334, "grad_norm": 10.424063523800296, "learning_rate": 1.348942396847297e-07, "loss": 1.4605, "step": 8216 }, { "epoch": 2.8006134969325154, "grad_norm": 25.48032531318921, "learning_rate": 1.3443709460284616e-07, "loss": 1.485, "step": 8217 }, { "epoch": 2.8009543285616907, "grad_norm": 15.04601390894561, "learning_rate": 1.339807148933775e-07, "loss": 1.3406, "step": 8218 }, { "epoch": 2.8012951601908656, "grad_norm": 17.788534784044888, "learning_rate": 1.335251006281124e-07, "loss": 1.5815, "step": 8219 }, { "epoch": 2.801635991820041, "grad_norm": 17.15743956511697, "learning_rate": 1.3307025187872135e-07, "loss": 1.4299, "step": 8220 }, { "epoch": 2.801976823449216, "grad_norm": 12.860507769089491, "learning_rate": 1.3261616871675375e-07, "loss": 1.5437, "step": 8221 }, { "epoch": 2.802317655078391, "grad_norm": 12.030930558230326, "learning_rate": 1.3216285121363913e-07, "loss": 1.5003, "step": 8222 }, { "epoch": 2.8026584867075663, "grad_norm": 10.59522362544998, "learning_rate": 1.317102994406849e-07, "loss": 1.2976, "step": 8223 }, { "epoch": 2.8029993183367417, "grad_norm": 32.829852478217894, "learning_rate": 1.3125851346908082e-07, "loss": 1.7733, "step": 8224 }, { "epoch": 2.803340149965917, "grad_norm": 13.559153641883631, "learning_rate": 1.3080749336989384e-07, "loss": 1.1825, "step": 8225 }, { "epoch": 2.8036809815950923, "grad_norm": 11.467271678237381, "learning_rate": 1.3035723921407117e-07, "loss": 1.4316, "step": 8226 }, { "epoch": 2.804021813224267, "grad_norm": 24.992102370026817, "learning_rate": 1.2990775107243947e-07, "loss": 0.9822, "step": 8227 }, { "epoch": 2.8043626448534424, "grad_norm": 21.233528937745827, "learning_rate": 1.2945902901570605e-07, "loss": 1.5317, "step": 8228 }, { "epoch": 2.8047034764826178, "grad_norm": 11.410600305985529, "learning_rate": 1.290110731144545e-07, "loss": 1.3397, "step": 8229 }, { "epoch": 2.8050443081117926, "grad_norm": 13.86308173974429, "learning_rate": 1.2856388343915226e-07, "loss": 1.4555, "step": 8230 }, { "epoch": 2.805385139740968, "grad_norm": 13.477384208997021, "learning_rate": 1.281174600601437e-07, "loss": 1.7159, "step": 8231 }, { "epoch": 2.8057259713701432, "grad_norm": 14.930078589709218, "learning_rate": 1.2767180304765149e-07, "loss": 1.5707, "step": 8232 }, { "epoch": 2.806066802999318, "grad_norm": 20.43375386680405, "learning_rate": 1.272269124717812e-07, "loss": 1.0831, "step": 8233 }, { "epoch": 2.8064076346284934, "grad_norm": 13.27559774431738, "learning_rate": 1.2678278840251467e-07, "loss": 1.2217, "step": 8234 }, { "epoch": 2.8067484662576687, "grad_norm": 10.699368227882474, "learning_rate": 1.2633943090971434e-07, "loss": 1.207, "step": 8235 }, { "epoch": 2.807089297886844, "grad_norm": 16.32259052207818, "learning_rate": 1.2589684006312276e-07, "loss": 1.6315, "step": 8236 }, { "epoch": 2.8074301295160193, "grad_norm": 10.214735566559185, "learning_rate": 1.2545501593236086e-07, "loss": 1.516, "step": 8237 }, { "epoch": 2.807770961145194, "grad_norm": 11.357863383507132, "learning_rate": 1.250139585869292e-07, "loss": 1.4676, "step": 8238 }, { "epoch": 2.8081117927743695, "grad_norm": 29.7893589148878, "learning_rate": 1.2457366809620786e-07, "loss": 1.9946, "step": 8239 }, { "epoch": 2.808452624403545, "grad_norm": 12.020215000489076, "learning_rate": 1.2413414452945693e-07, "loss": 1.2356, "step": 8240 }, { "epoch": 2.8087934560327197, "grad_norm": 17.79007337655226, "learning_rate": 1.2369538795581447e-07, "loss": 1.3717, "step": 8241 }, { "epoch": 2.809134287661895, "grad_norm": 56.095565943483614, "learning_rate": 1.2325739844429919e-07, "loss": 1.8355, "step": 8242 }, { "epoch": 2.8094751192910703, "grad_norm": 19.792954981304156, "learning_rate": 1.2282017606380814e-07, "loss": 1.7132, "step": 8243 }, { "epoch": 2.809815950920245, "grad_norm": 10.84996252715398, "learning_rate": 1.2238372088311913e-07, "loss": 0.928, "step": 8244 }, { "epoch": 2.8101567825494205, "grad_norm": 14.613616729931715, "learning_rate": 1.219480329708861e-07, "loss": 1.3954, "step": 8245 }, { "epoch": 2.810497614178596, "grad_norm": 17.488664023255833, "learning_rate": 1.21513112395647e-07, "loss": 1.749, "step": 8246 }, { "epoch": 2.810838445807771, "grad_norm": 20.721633169953872, "learning_rate": 1.2107895922581547e-07, "loss": 2.0875, "step": 8247 }, { "epoch": 2.8111792774369464, "grad_norm": 9.94286431974173, "learning_rate": 1.2064557352968408e-07, "loss": 1.2359, "step": 8248 }, { "epoch": 2.8115201090661213, "grad_norm": 30.48215864730469, "learning_rate": 1.2021295537542942e-07, "loss": 1.7001, "step": 8249 }, { "epoch": 2.8118609406952966, "grad_norm": 20.030829789932678, "learning_rate": 1.1978110483110096e-07, "loss": 1.8595, "step": 8250 }, { "epoch": 2.812201772324472, "grad_norm": 17.604134090878006, "learning_rate": 1.1935002196463208e-07, "loss": 1.2887, "step": 8251 }, { "epoch": 2.8125426039536467, "grad_norm": 16.10155947563347, "learning_rate": 1.1891970684383359e-07, "loss": 1.9301, "step": 8252 }, { "epoch": 2.812883435582822, "grad_norm": 17.05795608124555, "learning_rate": 1.1849015953639576e-07, "loss": 0.9244, "step": 8253 }, { "epoch": 2.8132242672119974, "grad_norm": 14.491177995074944, "learning_rate": 1.1806138010988788e-07, "loss": 1.5347, "step": 8254 }, { "epoch": 2.8135650988411722, "grad_norm": 17.38111847487713, "learning_rate": 1.1763336863175879e-07, "loss": 1.8779, "step": 8255 }, { "epoch": 2.8139059304703475, "grad_norm": 14.405274727581789, "learning_rate": 1.1720612516933627e-07, "loss": 0.7092, "step": 8256 }, { "epoch": 2.814246762099523, "grad_norm": 30.04210428779782, "learning_rate": 1.1677964978982715e-07, "loss": 1.1057, "step": 8257 }, { "epoch": 2.814587593728698, "grad_norm": 17.045106264908963, "learning_rate": 1.1635394256031884e-07, "loss": 1.8347, "step": 8258 }, { "epoch": 2.8149284253578735, "grad_norm": 9.621178593425691, "learning_rate": 1.1592900354777559e-07, "loss": 1.2854, "step": 8259 }, { "epoch": 2.8152692569870483, "grad_norm": 10.385211192582364, "learning_rate": 1.1550483281904278e-07, "loss": 1.279, "step": 8260 }, { "epoch": 2.8156100886162236, "grad_norm": 15.956150354432507, "learning_rate": 1.1508143044084319e-07, "loss": 1.3826, "step": 8261 }, { "epoch": 2.815950920245399, "grad_norm": 15.979278255782432, "learning_rate": 1.1465879647978128e-07, "loss": 1.4937, "step": 8262 }, { "epoch": 2.816291751874574, "grad_norm": 11.57109807596676, "learning_rate": 1.1423693100233724e-07, "loss": 1.0153, "step": 8263 }, { "epoch": 2.816632583503749, "grad_norm": 14.014465300016395, "learning_rate": 1.1381583407487352e-07, "loss": 1.6701, "step": 8264 }, { "epoch": 2.8169734151329244, "grad_norm": 13.737091953530468, "learning_rate": 1.133955057636299e-07, "loss": 1.3068, "step": 8265 }, { "epoch": 2.8173142467620993, "grad_norm": 17.73479776885619, "learning_rate": 1.1297594613472518e-07, "loss": 1.7996, "step": 8266 }, { "epoch": 2.8176550783912746, "grad_norm": 11.466213184583415, "learning_rate": 1.1255715525415933e-07, "loss": 1.3864, "step": 8267 }, { "epoch": 2.81799591002045, "grad_norm": 13.97634262270077, "learning_rate": 1.12139133187808e-07, "loss": 1.5242, "step": 8268 }, { "epoch": 2.818336741649625, "grad_norm": 16.696560211728773, "learning_rate": 1.1172188000142803e-07, "loss": 1.7046, "step": 8269 }, { "epoch": 2.8186775732788005, "grad_norm": 16.288142354125686, "learning_rate": 1.1130539576065636e-07, "loss": 1.8449, "step": 8270 }, { "epoch": 2.8190184049079754, "grad_norm": 11.868367389931052, "learning_rate": 1.1088968053100613e-07, "loss": 0.88, "step": 8271 }, { "epoch": 2.8193592365371507, "grad_norm": 15.666214078014766, "learning_rate": 1.1047473437787171e-07, "loss": 1.1995, "step": 8272 }, { "epoch": 2.819700068166326, "grad_norm": 12.729830413381647, "learning_rate": 1.1006055736652532e-07, "loss": 1.5465, "step": 8273 }, { "epoch": 2.820040899795501, "grad_norm": 24.906200654543838, "learning_rate": 1.0964714956211931e-07, "loss": 1.7233, "step": 8274 }, { "epoch": 2.820381731424676, "grad_norm": 14.224330686401801, "learning_rate": 1.0923451102968386e-07, "loss": 1.3604, "step": 8275 }, { "epoch": 2.8207225630538515, "grad_norm": 15.730646404817547, "learning_rate": 1.0882264183412928e-07, "loss": 1.3671, "step": 8276 }, { "epoch": 2.8210633946830264, "grad_norm": 14.415213068503464, "learning_rate": 1.0841154204024318e-07, "loss": 1.145, "step": 8277 }, { "epoch": 2.8214042263122017, "grad_norm": 17.291162758536686, "learning_rate": 1.080012117126944e-07, "loss": 1.7495, "step": 8278 }, { "epoch": 2.821745057941377, "grad_norm": 14.502457458706052, "learning_rate": 1.0759165091602852e-07, "loss": 1.6, "step": 8279 }, { "epoch": 2.8220858895705523, "grad_norm": 14.89627779416749, "learning_rate": 1.071828597146718e-07, "loss": 1.2145, "step": 8280 }, { "epoch": 2.8224267211997276, "grad_norm": 14.047376237823281, "learning_rate": 1.0677483817292832e-07, "loss": 1.4936, "step": 8281 }, { "epoch": 2.8227675528289025, "grad_norm": 16.659148087463063, "learning_rate": 1.0636758635498123e-07, "loss": 1.0427, "step": 8282 }, { "epoch": 2.8231083844580778, "grad_norm": 15.018556627528238, "learning_rate": 1.059611043248937e-07, "loss": 1.4299, "step": 8283 }, { "epoch": 2.823449216087253, "grad_norm": 15.143827898203147, "learning_rate": 1.0555539214660682e-07, "loss": 1.5391, "step": 8284 }, { "epoch": 2.823790047716428, "grad_norm": 15.544348062914478, "learning_rate": 1.0515044988393897e-07, "loss": 0.8926, "step": 8285 }, { "epoch": 2.8241308793456033, "grad_norm": 27.050845986625262, "learning_rate": 1.0474627760059253e-07, "loss": 2.0421, "step": 8286 }, { "epoch": 2.8244717109747786, "grad_norm": 14.825691478261193, "learning_rate": 1.043428753601422e-07, "loss": 1.6738, "step": 8287 }, { "epoch": 2.8248125426039534, "grad_norm": 10.415440467379385, "learning_rate": 1.0394024322604667e-07, "loss": 0.9944, "step": 8288 }, { "epoch": 2.8251533742331287, "grad_norm": 20.613455407819032, "learning_rate": 1.0353838126164029e-07, "loss": 1.6449, "step": 8289 }, { "epoch": 2.825494205862304, "grad_norm": 18.689015411118376, "learning_rate": 1.0313728953013912e-07, "loss": 1.9172, "step": 8290 }, { "epoch": 2.8258350374914794, "grad_norm": 19.514347591201773, "learning_rate": 1.0273696809463496e-07, "loss": 1.4183, "step": 8291 }, { "epoch": 2.8261758691206547, "grad_norm": 14.974139679956037, "learning_rate": 1.0233741701810074e-07, "loss": 1.5062, "step": 8292 }, { "epoch": 2.8265167007498295, "grad_norm": 15.18985240138093, "learning_rate": 1.019386363633873e-07, "loss": 1.1013, "step": 8293 }, { "epoch": 2.826857532379005, "grad_norm": 25.074197869570547, "learning_rate": 1.0154062619322447e-07, "loss": 1.5517, "step": 8294 }, { "epoch": 2.82719836400818, "grad_norm": 15.513846613000494, "learning_rate": 1.011433865702205e-07, "loss": 1.0701, "step": 8295 }, { "epoch": 2.827539195637355, "grad_norm": 17.556290672867668, "learning_rate": 1.0074691755686372e-07, "loss": 1.0388, "step": 8296 }, { "epoch": 2.8278800272665303, "grad_norm": 20.1995249312285, "learning_rate": 1.0035121921551872e-07, "loss": 1.5416, "step": 8297 }, { "epoch": 2.8282208588957056, "grad_norm": 17.707479674864402, "learning_rate": 9.995629160843178e-08, "loss": 0.9837, "step": 8298 }, { "epoch": 2.8285616905248805, "grad_norm": 15.090604757449515, "learning_rate": 9.956213479772659e-08, "loss": 1.5884, "step": 8299 }, { "epoch": 2.828902522154056, "grad_norm": 16.30195152143396, "learning_rate": 9.916874884540461e-08, "loss": 1.576, "step": 8300 }, { "epoch": 2.829243353783231, "grad_norm": 19.678421240139624, "learning_rate": 9.87761338133475e-08, "loss": 1.387, "step": 8301 }, { "epoch": 2.8295841854124064, "grad_norm": 16.091595203328364, "learning_rate": 9.83842897633147e-08, "loss": 1.6368, "step": 8302 }, { "epoch": 2.8299250170415817, "grad_norm": 14.357861485478358, "learning_rate": 9.799321675694584e-08, "loss": 1.9884, "step": 8303 }, { "epoch": 2.8302658486707566, "grad_norm": 13.05494881426643, "learning_rate": 9.760291485575779e-08, "loss": 1.2804, "step": 8304 }, { "epoch": 2.830606680299932, "grad_norm": 12.771030112933394, "learning_rate": 9.721338412114645e-08, "loss": 1.2111, "step": 8305 }, { "epoch": 2.830947511929107, "grad_norm": 15.840166839983452, "learning_rate": 9.682462461438669e-08, "loss": 1.6872, "step": 8306 }, { "epoch": 2.831288343558282, "grad_norm": 13.143594235705484, "learning_rate": 9.643663639663181e-08, "loss": 1.5148, "step": 8307 }, { "epoch": 2.8316291751874574, "grad_norm": 11.323021583793322, "learning_rate": 9.604941952891411e-08, "loss": 0.8044, "step": 8308 }, { "epoch": 2.8319700068166327, "grad_norm": 17.243010781828, "learning_rate": 9.566297407214431e-08, "loss": 1.7713, "step": 8309 }, { "epoch": 2.8323108384458076, "grad_norm": 12.678492379208308, "learning_rate": 9.527730008711156e-08, "loss": 1.3411, "step": 8310 }, { "epoch": 2.832651670074983, "grad_norm": 15.656272679024646, "learning_rate": 9.489239763448399e-08, "loss": 1.5851, "step": 8311 }, { "epoch": 2.832992501704158, "grad_norm": 10.680814806126607, "learning_rate": 9.450826677480874e-08, "loss": 1.0473, "step": 8312 }, { "epoch": 2.8333333333333335, "grad_norm": 20.15711308337298, "learning_rate": 9.412490756851022e-08, "loss": 1.5046, "step": 8313 }, { "epoch": 2.833674164962509, "grad_norm": 18.31043386896112, "learning_rate": 9.374232007589302e-08, "loss": 1.8175, "step": 8314 }, { "epoch": 2.8340149965916837, "grad_norm": 20.80942623777544, "learning_rate": 9.336050435714006e-08, "loss": 1.8046, "step": 8315 }, { "epoch": 2.834355828220859, "grad_norm": 12.53499206989504, "learning_rate": 9.29794604723111e-08, "loss": 0.9842, "step": 8316 }, { "epoch": 2.8346966598500343, "grad_norm": 17.921046678034052, "learning_rate": 9.259918848134764e-08, "loss": 1.6165, "step": 8317 }, { "epoch": 2.835037491479209, "grad_norm": 17.174488169167674, "learning_rate": 9.221968844406681e-08, "loss": 1.9184, "step": 8318 }, { "epoch": 2.8353783231083844, "grad_norm": 17.88123888455869, "learning_rate": 9.184096042016533e-08, "loss": 1.3134, "step": 8319 }, { "epoch": 2.8357191547375598, "grad_norm": 11.897018175693635, "learning_rate": 9.146300446921941e-08, "loss": 1.2825, "step": 8320 }, { "epoch": 2.8360599863667346, "grad_norm": 16.597004628731124, "learning_rate": 9.10858206506826e-08, "loss": 1.5684, "step": 8321 }, { "epoch": 2.83640081799591, "grad_norm": 10.624192387809481, "learning_rate": 9.070940902388748e-08, "loss": 0.9301, "step": 8322 }, { "epoch": 2.8367416496250852, "grad_norm": 17.07181807443578, "learning_rate": 9.033376964804497e-08, "loss": 1.343, "step": 8323 }, { "epoch": 2.8370824812542605, "grad_norm": 21.229396559520367, "learning_rate": 8.995890258224449e-08, "loss": 1.0703, "step": 8324 }, { "epoch": 2.837423312883436, "grad_norm": 15.878518165004843, "learning_rate": 8.958480788545499e-08, "loss": 1.1107, "step": 8325 }, { "epoch": 2.8377641445126107, "grad_norm": 17.04258312775287, "learning_rate": 8.921148561652216e-08, "loss": 1.946, "step": 8326 }, { "epoch": 2.838104976141786, "grad_norm": 15.988715966711233, "learning_rate": 8.883893583417125e-08, "loss": 1.7105, "step": 8327 }, { "epoch": 2.8384458077709613, "grad_norm": 18.13675934950191, "learning_rate": 8.846715859700595e-08, "loss": 2.0403, "step": 8328 }, { "epoch": 2.838786639400136, "grad_norm": 12.557680083795747, "learning_rate": 8.80961539635078e-08, "loss": 0.9557, "step": 8329 }, { "epoch": 2.8391274710293115, "grad_norm": 16.315648952971717, "learning_rate": 8.7725921992039e-08, "loss": 1.1675, "step": 8330 }, { "epoch": 2.839468302658487, "grad_norm": 14.673537556478621, "learning_rate": 8.735646274083631e-08, "loss": 1.3642, "step": 8331 }, { "epoch": 2.8398091342876617, "grad_norm": 16.493782441670522, "learning_rate": 8.698777626801824e-08, "loss": 1.8308, "step": 8332 }, { "epoch": 2.840149965916837, "grad_norm": 11.596640413292967, "learning_rate": 8.661986263158118e-08, "loss": 0.5732, "step": 8333 }, { "epoch": 2.8404907975460123, "grad_norm": 13.52512298627983, "learning_rate": 8.625272188939826e-08, "loss": 1.9182, "step": 8334 }, { "epoch": 2.8408316291751876, "grad_norm": 17.886532232614073, "learning_rate": 8.588635409922219e-08, "loss": 1.8457, "step": 8335 }, { "epoch": 2.841172460804363, "grad_norm": 19.81683975047375, "learning_rate": 8.552075931868576e-08, "loss": 1.3658, "step": 8336 }, { "epoch": 2.841513292433538, "grad_norm": 12.011842183621471, "learning_rate": 8.515593760529628e-08, "loss": 1.4196, "step": 8337 }, { "epoch": 2.841854124062713, "grad_norm": 16.02533976649134, "learning_rate": 8.479188901644286e-08, "loss": 1.0307, "step": 8338 }, { "epoch": 2.8421949556918884, "grad_norm": 15.333089956009555, "learning_rate": 8.442861360939192e-08, "loss": 1.4138, "step": 8339 }, { "epoch": 2.8425357873210633, "grad_norm": 24.171611145420794, "learning_rate": 8.406611144128774e-08, "loss": 1.5787, "step": 8340 }, { "epoch": 2.8428766189502386, "grad_norm": 10.750314781082961, "learning_rate": 8.370438256915359e-08, "loss": 0.8305, "step": 8341 }, { "epoch": 2.843217450579414, "grad_norm": 15.65574273137029, "learning_rate": 8.334342704989006e-08, "loss": 1.1973, "step": 8342 }, { "epoch": 2.8435582822085887, "grad_norm": 19.989696539145793, "learning_rate": 8.298324494027788e-08, "loss": 1.4668, "step": 8343 }, { "epoch": 2.843899113837764, "grad_norm": 14.389069255908973, "learning_rate": 8.262383629697502e-08, "loss": 1.6472, "step": 8344 }, { "epoch": 2.8442399454669394, "grad_norm": 13.523646052040549, "learning_rate": 8.226520117651737e-08, "loss": 1.3953, "step": 8345 }, { "epoch": 2.8445807770961147, "grad_norm": 15.999642880604428, "learning_rate": 8.190733963532038e-08, "loss": 1.4307, "step": 8346 }, { "epoch": 2.8449216087252895, "grad_norm": 10.267150892893753, "learning_rate": 8.155025172967624e-08, "loss": 1.2, "step": 8347 }, { "epoch": 2.845262440354465, "grad_norm": 13.898784335695028, "learning_rate": 8.119393751575666e-08, "loss": 1.311, "step": 8348 }, { "epoch": 2.84560327198364, "grad_norm": 14.82572689515187, "learning_rate": 8.083839704961183e-08, "loss": 1.3334, "step": 8349 }, { "epoch": 2.8459441036128155, "grad_norm": 11.613988523296225, "learning_rate": 8.048363038716866e-08, "loss": 1.5931, "step": 8350 }, { "epoch": 2.8462849352419903, "grad_norm": 17.358331479730804, "learning_rate": 8.01296375842342e-08, "loss": 1.0894, "step": 8351 }, { "epoch": 2.8466257668711656, "grad_norm": 15.472521868856697, "learning_rate": 7.977641869649278e-08, "loss": 1.9457, "step": 8352 }, { "epoch": 2.846966598500341, "grad_norm": 13.573665519936068, "learning_rate": 7.942397377950606e-08, "loss": 1.0465, "step": 8353 }, { "epoch": 2.847307430129516, "grad_norm": 16.1660170725371, "learning_rate": 7.907230288871748e-08, "loss": 1.5724, "step": 8354 }, { "epoch": 2.847648261758691, "grad_norm": 16.574719937623822, "learning_rate": 7.872140607944334e-08, "loss": 1.8747, "step": 8355 }, { "epoch": 2.8479890933878664, "grad_norm": 17.750565688127494, "learning_rate": 7.837128340688338e-08, "loss": 1.8695, "step": 8356 }, { "epoch": 2.8483299250170417, "grad_norm": 24.658080806646137, "learning_rate": 7.802193492611187e-08, "loss": 1.4672, "step": 8357 }, { "epoch": 2.8486707566462166, "grad_norm": 14.19087347699965, "learning_rate": 7.76733606920832e-08, "loss": 1.2929, "step": 8358 }, { "epoch": 2.849011588275392, "grad_norm": 15.426612604376055, "learning_rate": 7.732556075963016e-08, "loss": 1.5222, "step": 8359 }, { "epoch": 2.8493524199045672, "grad_norm": 16.669407403149016, "learning_rate": 7.697853518346176e-08, "loss": 1.4241, "step": 8360 }, { "epoch": 2.8496932515337425, "grad_norm": 22.85129948805778, "learning_rate": 7.663228401816768e-08, "loss": 1.2504, "step": 8361 }, { "epoch": 2.8500340831629174, "grad_norm": 12.283543098115507, "learning_rate": 7.62868073182138e-08, "loss": 1.3587, "step": 8362 }, { "epoch": 2.8503749147920927, "grad_norm": 17.9764453170893, "learning_rate": 7.594210513794553e-08, "loss": 1.0755, "step": 8363 }, { "epoch": 2.850715746421268, "grad_norm": 27.26723481640397, "learning_rate": 7.55981775315856e-08, "loss": 1.0965, "step": 8364 }, { "epoch": 2.851056578050443, "grad_norm": 18.236676620028383, "learning_rate": 7.525502455323575e-08, "loss": 1.1992, "step": 8365 }, { "epoch": 2.851397409679618, "grad_norm": 25.42849329023096, "learning_rate": 7.491264625687389e-08, "loss": 1.9462, "step": 8366 }, { "epoch": 2.8517382413087935, "grad_norm": 13.086045567024971, "learning_rate": 7.457104269635917e-08, "loss": 1.0996, "step": 8367 }, { "epoch": 2.852079072937969, "grad_norm": 14.995950298016279, "learning_rate": 7.423021392542585e-08, "loss": 1.3162, "step": 8368 }, { "epoch": 2.8524199045671437, "grad_norm": 18.995435525359163, "learning_rate": 7.389015999768823e-08, "loss": 1.3321, "step": 8369 }, { "epoch": 2.852760736196319, "grad_norm": 23.75078576257384, "learning_rate": 7.355088096663854e-08, "loss": 1.8234, "step": 8370 }, { "epoch": 2.8531015678254943, "grad_norm": 16.920289408859034, "learning_rate": 7.32123768856463e-08, "loss": 1.1213, "step": 8371 }, { "epoch": 2.8534423994546696, "grad_norm": 16.449804736985566, "learning_rate": 7.28746478079595e-08, "loss": 1.6279, "step": 8372 }, { "epoch": 2.8537832310838445, "grad_norm": 13.781941499752453, "learning_rate": 7.253769378670395e-08, "loss": 1.3934, "step": 8373 }, { "epoch": 2.8541240627130198, "grad_norm": 14.062293388327198, "learning_rate": 7.220151487488447e-08, "loss": 1.7862, "step": 8374 }, { "epoch": 2.854464894342195, "grad_norm": 21.443078064176632, "learning_rate": 7.186611112538323e-08, "loss": 1.5241, "step": 8375 }, { "epoch": 2.85480572597137, "grad_norm": 13.730903088028882, "learning_rate": 7.153148259096076e-08, "loss": 1.3361, "step": 8376 }, { "epoch": 2.8551465576005453, "grad_norm": 18.061306798848523, "learning_rate": 7.1197629324255e-08, "loss": 1.1922, "step": 8377 }, { "epoch": 2.8554873892297206, "grad_norm": 17.435140943622972, "learning_rate": 7.086455137778226e-08, "loss": 1.9982, "step": 8378 }, { "epoch": 2.855828220858896, "grad_norm": 17.43812622064147, "learning_rate": 7.053224880393783e-08, "loss": 1.5015, "step": 8379 }, { "epoch": 2.8561690524880707, "grad_norm": 14.484763355626791, "learning_rate": 7.020072165499381e-08, "loss": 1.1332, "step": 8380 }, { "epoch": 2.856509884117246, "grad_norm": 15.129067853163166, "learning_rate": 6.986996998310014e-08, "loss": 0.8882, "step": 8381 }, { "epoch": 2.8568507157464214, "grad_norm": 15.693009268057772, "learning_rate": 6.953999384028576e-08, "loss": 1.8093, "step": 8382 }, { "epoch": 2.8571915473755967, "grad_norm": 18.252988495944656, "learning_rate": 6.921079327845803e-08, "loss": 1.8729, "step": 8383 }, { "epoch": 2.8575323790047715, "grad_norm": 19.200546462843807, "learning_rate": 6.888236834940054e-08, "loss": 1.3218, "step": 8384 }, { "epoch": 2.857873210633947, "grad_norm": 17.120965142077626, "learning_rate": 6.855471910477641e-08, "loss": 1.5881, "step": 8385 }, { "epoch": 2.858214042263122, "grad_norm": 10.921803401348706, "learning_rate": 6.82278455961255e-08, "loss": 1.1725, "step": 8386 }, { "epoch": 2.858554873892297, "grad_norm": 14.040404196909988, "learning_rate": 6.790174787486669e-08, "loss": 1.6296, "step": 8387 }, { "epoch": 2.8588957055214723, "grad_norm": 12.650158729062975, "learning_rate": 6.757642599229674e-08, "loss": 1.5884, "step": 8388 }, { "epoch": 2.8592365371506476, "grad_norm": 13.540435806499223, "learning_rate": 6.725187999959026e-08, "loss": 1.397, "step": 8389 }, { "epoch": 2.8595773687798225, "grad_norm": 18.846869720595244, "learning_rate": 6.692810994779863e-08, "loss": 1.3682, "step": 8390 }, { "epoch": 2.859918200408998, "grad_norm": 14.882241801702468, "learning_rate": 6.660511588785279e-08, "loss": 0.6665, "step": 8391 }, { "epoch": 2.860259032038173, "grad_norm": 13.907625050639338, "learning_rate": 6.628289787056096e-08, "loss": 1.3159, "step": 8392 }, { "epoch": 2.8605998636673484, "grad_norm": 14.607263227885426, "learning_rate": 6.59614559466093e-08, "loss": 1.2993, "step": 8393 }, { "epoch": 2.8609406952965237, "grad_norm": 15.764255240668708, "learning_rate": 6.564079016656177e-08, "loss": 1.7458, "step": 8394 }, { "epoch": 2.8612815269256986, "grad_norm": 14.167157653780208, "learning_rate": 6.532090058086027e-08, "loss": 1.3899, "step": 8395 }, { "epoch": 2.861622358554874, "grad_norm": 20.895547809634046, "learning_rate": 6.500178723982564e-08, "loss": 1.0913, "step": 8396 }, { "epoch": 2.861963190184049, "grad_norm": 17.63280568306113, "learning_rate": 6.46834501936544e-08, "loss": 1.7421, "step": 8397 }, { "epoch": 2.862304021813224, "grad_norm": 12.860577008428628, "learning_rate": 6.436588949242262e-08, "loss": 1.3548, "step": 8398 }, { "epoch": 2.8626448534423994, "grad_norm": 29.00002250632659, "learning_rate": 6.404910518608476e-08, "loss": 1.2711, "step": 8399 }, { "epoch": 2.8629856850715747, "grad_norm": 15.03024202841385, "learning_rate": 6.373309732447097e-08, "loss": 0.958, "step": 8400 }, { "epoch": 2.8633265167007496, "grad_norm": 12.775467534976185, "learning_rate": 6.34178659572915e-08, "loss": 1.056, "step": 8401 }, { "epoch": 2.863667348329925, "grad_norm": 21.989257551230654, "learning_rate": 6.310341113413332e-08, "loss": 1.4024, "step": 8402 }, { "epoch": 2.8640081799591, "grad_norm": 15.791522649363761, "learning_rate": 6.278973290446022e-08, "loss": 1.5924, "step": 8403 }, { "epoch": 2.8643490115882755, "grad_norm": 16.68601879762969, "learning_rate": 6.247683131761717e-08, "loss": 1.6535, "step": 8404 }, { "epoch": 2.864689843217451, "grad_norm": 12.553769938935936, "learning_rate": 6.216470642282313e-08, "loss": 1.096, "step": 8405 }, { "epoch": 2.8650306748466257, "grad_norm": 43.71729096531252, "learning_rate": 6.185335826917715e-08, "loss": 1.5698, "step": 8406 }, { "epoch": 2.865371506475801, "grad_norm": 20.96630215946244, "learning_rate": 6.154278690565562e-08, "loss": 1.5897, "step": 8407 }, { "epoch": 2.8657123381049763, "grad_norm": 31.48575750178133, "learning_rate": 6.123299238111224e-08, "loss": 1.3451, "step": 8408 }, { "epoch": 2.866053169734151, "grad_norm": 13.216202007060252, "learning_rate": 6.092397474427969e-08, "loss": 0.8031, "step": 8409 }, { "epoch": 2.8663940013633264, "grad_norm": 17.601480750886914, "learning_rate": 6.061573404376742e-08, "loss": 1.6364, "step": 8410 }, { "epoch": 2.8667348329925018, "grad_norm": 19.1304943379796, "learning_rate": 6.030827032806219e-08, "loss": 1.4749, "step": 8411 }, { "epoch": 2.8670756646216766, "grad_norm": 18.452779646493852, "learning_rate": 6.000158364552977e-08, "loss": 1.1529, "step": 8412 }, { "epoch": 2.867416496250852, "grad_norm": 14.69799375569531, "learning_rate": 5.969567404441379e-08, "loss": 1.4858, "step": 8413 }, { "epoch": 2.8677573278800272, "grad_norm": 18.829827877976776, "learning_rate": 5.939054157283464e-08, "loss": 1.5336, "step": 8414 }, { "epoch": 2.8680981595092025, "grad_norm": 15.532524599263466, "learning_rate": 5.908618627879004e-08, "loss": 1.4543, "step": 8415 }, { "epoch": 2.868438991138378, "grad_norm": 13.786290107010572, "learning_rate": 5.878260821015669e-08, "loss": 1.3437, "step": 8416 }, { "epoch": 2.8687798227675527, "grad_norm": 15.745544387797732, "learning_rate": 5.8479807414690284e-08, "loss": 1.1697, "step": 8417 }, { "epoch": 2.869120654396728, "grad_norm": 16.718101485207587, "learning_rate": 5.817778394001994e-08, "loss": 1.5091, "step": 8418 }, { "epoch": 2.8694614860259033, "grad_norm": 13.710174860414487, "learning_rate": 5.7876537833657097e-08, "loss": 0.9766, "step": 8419 }, { "epoch": 2.869802317655078, "grad_norm": 46.733975825429276, "learning_rate": 5.75760691429883e-08, "loss": 1.3394, "step": 8420 }, { "epoch": 2.8701431492842535, "grad_norm": 13.798049631051926, "learning_rate": 5.7276377915277956e-08, "loss": 1.8173, "step": 8421 }, { "epoch": 2.870483980913429, "grad_norm": 15.921797845236066, "learning_rate": 5.6977464197669475e-08, "loss": 1.252, "step": 8422 }, { "epoch": 2.8708248125426037, "grad_norm": 22.802474215951676, "learning_rate": 5.667932803718301e-08, "loss": 1.2649, "step": 8423 }, { "epoch": 2.871165644171779, "grad_norm": 22.795079629571035, "learning_rate": 5.638196948071606e-08, "loss": 1.2721, "step": 8424 }, { "epoch": 2.8715064758009543, "grad_norm": 14.27778974974975, "learning_rate": 5.608538857504564e-08, "loss": 1.0583, "step": 8425 }, { "epoch": 2.8718473074301296, "grad_norm": 21.309088069398737, "learning_rate": 5.5789585366823885e-08, "loss": 1.6431, "step": 8426 }, { "epoch": 2.872188139059305, "grad_norm": 15.105806623243124, "learning_rate": 5.5494559902581345e-08, "loss": 1.4269, "step": 8427 }, { "epoch": 2.87252897068848, "grad_norm": 11.70790203576845, "learning_rate": 5.520031222872812e-08, "loss": 1.1219, "step": 8428 }, { "epoch": 2.872869802317655, "grad_norm": 16.490669975041634, "learning_rate": 5.490684239154997e-08, "loss": 0.9806, "step": 8429 }, { "epoch": 2.8732106339468304, "grad_norm": 17.252104311707065, "learning_rate": 5.461415043721108e-08, "loss": 1.4795, "step": 8430 }, { "epoch": 2.8735514655760053, "grad_norm": 16.00800127097942, "learning_rate": 5.4322236411751825e-08, "loss": 1.955, "step": 8431 }, { "epoch": 2.8738922972051806, "grad_norm": 15.180145136007328, "learning_rate": 5.4031100361092714e-08, "loss": 0.7515, "step": 8432 }, { "epoch": 2.874233128834356, "grad_norm": 17.32183409083827, "learning_rate": 5.374074233103044e-08, "loss": 1.58, "step": 8433 }, { "epoch": 2.8745739604635308, "grad_norm": 14.929315777634194, "learning_rate": 5.345116236723902e-08, "loss": 1.7061, "step": 8434 }, { "epoch": 2.874914792092706, "grad_norm": 16.15255451062592, "learning_rate": 5.316236051527146e-08, "loss": 1.7457, "step": 8435 }, { "epoch": 2.8752556237218814, "grad_norm": 18.288367259893022, "learning_rate": 5.2874336820556427e-08, "loss": 1.1903, "step": 8436 }, { "epoch": 2.8755964553510567, "grad_norm": 19.004901507394745, "learning_rate": 5.258709132840156e-08, "loss": 1.8613, "step": 8437 }, { "epoch": 2.875937286980232, "grad_norm": 14.647524423471973, "learning_rate": 5.230062408399183e-08, "loss": 1.4736, "step": 8438 }, { "epoch": 2.876278118609407, "grad_norm": 19.067604844223652, "learning_rate": 5.201493513238953e-08, "loss": 1.2344, "step": 8439 }, { "epoch": 2.876618950238582, "grad_norm": 18.206570411526027, "learning_rate": 5.173002451853482e-08, "loss": 0.948, "step": 8440 }, { "epoch": 2.8769597818677575, "grad_norm": 15.659666689476822, "learning_rate": 5.1445892287245166e-08, "loss": 1.4352, "step": 8441 }, { "epoch": 2.8773006134969323, "grad_norm": 18.68152811873611, "learning_rate": 5.1162538483215393e-08, "loss": 1.9717, "step": 8442 }, { "epoch": 2.8776414451261076, "grad_norm": 14.854479237855594, "learning_rate": 5.087996315101873e-08, "loss": 1.264, "step": 8443 }, { "epoch": 2.877982276755283, "grad_norm": 17.034715399072848, "learning_rate": 5.059816633510517e-08, "loss": 1.5506, "step": 8444 }, { "epoch": 2.878323108384458, "grad_norm": 18.802915383985052, "learning_rate": 5.0317148079802594e-08, "loss": 1.5393, "step": 8445 }, { "epoch": 2.878663940013633, "grad_norm": 19.171390951841314, "learning_rate": 5.0036908429315636e-08, "loss": 1.7684, "step": 8446 }, { "epoch": 2.8790047716428084, "grad_norm": 18.748907409648105, "learning_rate": 4.975744742772848e-08, "loss": 1.7856, "step": 8447 }, { "epoch": 2.8793456032719837, "grad_norm": 16.503738215868566, "learning_rate": 4.947876511899985e-08, "loss": 1.5649, "step": 8448 }, { "epoch": 2.879686434901159, "grad_norm": 17.651059517218684, "learning_rate": 4.920086154696857e-08, "loss": 1.2485, "step": 8449 }, { "epoch": 2.880027266530334, "grad_norm": 30.757221196132544, "learning_rate": 4.8923736755350224e-08, "loss": 1.0016, "step": 8450 }, { "epoch": 2.8803680981595092, "grad_norm": 11.18420916240282, "learning_rate": 4.864739078773717e-08, "loss": 1.3712, "step": 8451 }, { "epoch": 2.8807089297886845, "grad_norm": 13.885192588165802, "learning_rate": 4.837182368759907e-08, "loss": 1.8337, "step": 8452 }, { "epoch": 2.8810497614178594, "grad_norm": 9.947024151879797, "learning_rate": 4.809703549828459e-08, "loss": 0.9492, "step": 8453 }, { "epoch": 2.8813905930470347, "grad_norm": 21.109868762671073, "learning_rate": 4.7823026263019155e-08, "loss": 1.2386, "step": 8454 }, { "epoch": 2.88173142467621, "grad_norm": 14.418995757925682, "learning_rate": 4.7549796024904946e-08, "loss": 1.1359, "step": 8455 }, { "epoch": 2.882072256305385, "grad_norm": 16.8203590906096, "learning_rate": 4.727734482692259e-08, "loss": 1.1043, "step": 8456 }, { "epoch": 2.88241308793456, "grad_norm": 23.599184348212734, "learning_rate": 4.7005672711929464e-08, "loss": 1.2202, "step": 8457 }, { "epoch": 2.8827539195637355, "grad_norm": 14.527696422684436, "learning_rate": 4.6734779722660274e-08, "loss": 1.6076, "step": 8458 }, { "epoch": 2.883094751192911, "grad_norm": 20.46979457770489, "learning_rate": 4.646466590172871e-08, "loss": 1.6775, "step": 8459 }, { "epoch": 2.883435582822086, "grad_norm": 11.993145930162838, "learning_rate": 4.619533129162357e-08, "loss": 1.3565, "step": 8460 }, { "epoch": 2.883776414451261, "grad_norm": 14.577269347944524, "learning_rate": 4.5926775934713175e-08, "loss": 1.4573, "step": 8461 }, { "epoch": 2.8841172460804363, "grad_norm": 11.051753112229575, "learning_rate": 4.565899987324152e-08, "loss": 1.1118, "step": 8462 }, { "epoch": 2.8844580777096116, "grad_norm": 19.25229714432816, "learning_rate": 4.539200314933156e-08, "loss": 1.9694, "step": 8463 }, { "epoch": 2.8847989093387865, "grad_norm": 22.528393283329194, "learning_rate": 4.512578580498306e-08, "loss": 1.3606, "step": 8464 }, { "epoch": 2.8851397409679618, "grad_norm": 9.915647970588964, "learning_rate": 4.486034788207194e-08, "loss": 1.0066, "step": 8465 }, { "epoch": 2.885480572597137, "grad_norm": 13.70683682775047, "learning_rate": 4.4595689422353704e-08, "loss": 1.0109, "step": 8466 }, { "epoch": 2.885821404226312, "grad_norm": 21.538262101207845, "learning_rate": 4.433181046746005e-08, "loss": 1.6137, "step": 8467 }, { "epoch": 2.8861622358554873, "grad_norm": 14.04998604676097, "learning_rate": 4.406871105889943e-08, "loss": 1.3961, "step": 8468 }, { "epoch": 2.8865030674846626, "grad_norm": 12.36013019316959, "learning_rate": 4.3806391238059296e-08, "loss": 1.3811, "step": 8469 }, { "epoch": 2.886843899113838, "grad_norm": 20.258866757178073, "learning_rate": 4.354485104620332e-08, "loss": 2.1531, "step": 8470 }, { "epoch": 2.887184730743013, "grad_norm": 10.456018928775213, "learning_rate": 4.3284090524472466e-08, "loss": 1.2759, "step": 8471 }, { "epoch": 2.887525562372188, "grad_norm": 10.075969701024677, "learning_rate": 4.302410971388615e-08, "loss": 0.9555, "step": 8472 }, { "epoch": 2.8878663940013634, "grad_norm": 14.843440532229327, "learning_rate": 4.276490865534e-08, "loss": 1.2165, "step": 8473 }, { "epoch": 2.8882072256305387, "grad_norm": 15.482083939334546, "learning_rate": 4.250648738960694e-08, "loss": 1.3199, "step": 8474 }, { "epoch": 2.8885480572597135, "grad_norm": 29.802202730175694, "learning_rate": 4.224884595733836e-08, "loss": 0.9042, "step": 8475 }, { "epoch": 2.888888888888889, "grad_norm": 14.114660527238856, "learning_rate": 4.199198439906182e-08, "loss": 1.6313, "step": 8476 }, { "epoch": 2.889229720518064, "grad_norm": 15.047302141868467, "learning_rate": 4.17359027551828e-08, "loss": 1.8258, "step": 8477 }, { "epoch": 2.889570552147239, "grad_norm": 12.733770833388384, "learning_rate": 4.148060106598406e-08, "loss": 1.0744, "step": 8478 }, { "epoch": 2.8899113837764143, "grad_norm": 13.38530820636626, "learning_rate": 4.122607937162515e-08, "loss": 1.0907, "step": 8479 }, { "epoch": 2.8902522154055896, "grad_norm": 13.599509872446655, "learning_rate": 4.097233771214404e-08, "loss": 1.2826, "step": 8480 }, { "epoch": 2.890593047034765, "grad_norm": 16.63642288491717, "learning_rate": 4.071937612745491e-08, "loss": 1.7476, "step": 8481 }, { "epoch": 2.8909338786639402, "grad_norm": 12.737215660833234, "learning_rate": 4.046719465734983e-08, "loss": 1.3849, "step": 8482 }, { "epoch": 2.891274710293115, "grad_norm": 14.571529202143621, "learning_rate": 4.021579334149761e-08, "loss": 0.9629, "step": 8483 }, { "epoch": 2.8916155419222904, "grad_norm": 17.106338506945786, "learning_rate": 3.996517221944496e-08, "loss": 1.2462, "step": 8484 }, { "epoch": 2.8919563735514657, "grad_norm": 18.001141819809217, "learning_rate": 3.971533133061589e-08, "loss": 1.62, "step": 8485 }, { "epoch": 2.8922972051806406, "grad_norm": 12.358786336266135, "learning_rate": 3.946627071431008e-08, "loss": 1.5734, "step": 8486 }, { "epoch": 2.892638036809816, "grad_norm": 15.711485580426448, "learning_rate": 3.921799040970675e-08, "loss": 1.766, "step": 8487 }, { "epoch": 2.892978868438991, "grad_norm": 18.224574186058202, "learning_rate": 3.897049045586188e-08, "loss": 1.8236, "step": 8488 }, { "epoch": 2.893319700068166, "grad_norm": 16.40398090204466, "learning_rate": 3.872377089170709e-08, "loss": 1.4799, "step": 8489 }, { "epoch": 2.8936605316973414, "grad_norm": 20.674135557302606, "learning_rate": 3.847783175605302e-08, "loss": 1.293, "step": 8490 }, { "epoch": 2.8940013633265167, "grad_norm": 16.047946873696606, "learning_rate": 3.823267308758649e-08, "loss": 1.171, "step": 8491 }, { "epoch": 2.894342194955692, "grad_norm": 13.109156077111171, "learning_rate": 3.7988294924872215e-08, "loss": 1.2528, "step": 8492 }, { "epoch": 2.8946830265848673, "grad_norm": 19.906350591462743, "learning_rate": 3.774469730635222e-08, "loss": 1.1636, "step": 8493 }, { "epoch": 2.895023858214042, "grad_norm": 18.012659282768993, "learning_rate": 3.7501880270344735e-08, "loss": 1.8389, "step": 8494 }, { "epoch": 2.8953646898432175, "grad_norm": 16.828929448443237, "learning_rate": 3.725984385504588e-08, "loss": 1.8891, "step": 8495 }, { "epoch": 2.895705521472393, "grad_norm": 12.24292900037543, "learning_rate": 3.70185880985291e-08, "loss": 1.5868, "step": 8496 }, { "epoch": 2.8960463531015677, "grad_norm": 14.774360875630633, "learning_rate": 3.677811303874457e-08, "loss": 1.465, "step": 8497 }, { "epoch": 2.896387184730743, "grad_norm": 13.674827742775463, "learning_rate": 3.653841871352093e-08, "loss": 1.3467, "step": 8498 }, { "epoch": 2.8967280163599183, "grad_norm": 15.724302237650683, "learning_rate": 3.629950516056191e-08, "loss": 1.7879, "step": 8499 }, { "epoch": 2.897068847989093, "grad_norm": 12.59241821210436, "learning_rate": 3.606137241745022e-08, "loss": 0.9418, "step": 8500 }, { "epoch": 2.8974096796182685, "grad_norm": 17.533075296230663, "learning_rate": 3.582402052164535e-08, "loss": 1.8739, "step": 8501 }, { "epoch": 2.8977505112474438, "grad_norm": 17.35371864223056, "learning_rate": 3.558744951048243e-08, "loss": 1.4909, "step": 8502 }, { "epoch": 2.898091342876619, "grad_norm": 9.785790039280375, "learning_rate": 3.535165942117669e-08, "loss": 1.2014, "step": 8503 }, { "epoch": 2.8984321745057944, "grad_norm": 19.321648524781175, "learning_rate": 3.51166502908179e-08, "loss": 1.5066, "step": 8504 }, { "epoch": 2.8987730061349692, "grad_norm": 13.825923888380867, "learning_rate": 3.488242215637372e-08, "loss": 1.62, "step": 8505 }, { "epoch": 2.8991138377641446, "grad_norm": 17.232753049977433, "learning_rate": 3.464897505469023e-08, "loss": 1.408, "step": 8506 }, { "epoch": 2.89945466939332, "grad_norm": 11.389547531028953, "learning_rate": 3.441630902248805e-08, "loss": 1.2117, "step": 8507 }, { "epoch": 2.8997955010224947, "grad_norm": 13.512811725861628, "learning_rate": 3.4184424096367904e-08, "loss": 0.9783, "step": 8508 }, { "epoch": 2.90013633265167, "grad_norm": 20.789827332127615, "learning_rate": 3.3953320312805074e-08, "loss": 1.7075, "step": 8509 }, { "epoch": 2.9004771642808453, "grad_norm": 18.698878012710562, "learning_rate": 3.372299770815435e-08, "loss": 2.1461, "step": 8510 }, { "epoch": 2.90081799591002, "grad_norm": 16.390705359776057, "learning_rate": 3.349345631864509e-08, "loss": 1.3143, "step": 8511 }, { "epoch": 2.9011588275391955, "grad_norm": 14.154075051152994, "learning_rate": 3.3264696180385635e-08, "loss": 1.3235, "step": 8512 }, { "epoch": 2.901499659168371, "grad_norm": 16.300364181886565, "learning_rate": 3.303671732936109e-08, "loss": 1.3438, "step": 8513 }, { "epoch": 2.901840490797546, "grad_norm": 11.488865756926726, "learning_rate": 3.280951980143332e-08, "loss": 1.1084, "step": 8514 }, { "epoch": 2.9021813224267214, "grad_norm": 16.774047400693387, "learning_rate": 3.258310363234152e-08, "loss": 2.0656, "step": 8515 }, { "epoch": 2.9025221540558963, "grad_norm": 16.420893179179167, "learning_rate": 3.235746885770163e-08, "loss": 1.7141, "step": 8516 }, { "epoch": 2.9028629856850716, "grad_norm": 11.226292248323459, "learning_rate": 3.213261551300639e-08, "loss": 1.0448, "step": 8517 }, { "epoch": 2.903203817314247, "grad_norm": 23.70300515981716, "learning_rate": 3.190854363362694e-08, "loss": 1.2234, "step": 8518 }, { "epoch": 2.903544648943422, "grad_norm": 24.827698303546647, "learning_rate": 3.1685253254810645e-08, "loss": 1.5049, "step": 8519 }, { "epoch": 2.903885480572597, "grad_norm": 19.107373633678275, "learning_rate": 3.146274441168162e-08, "loss": 1.4261, "step": 8520 }, { "epoch": 2.9042263122017724, "grad_norm": 16.252311888807718, "learning_rate": 3.124101713924133e-08, "loss": 1.8897, "step": 8521 }, { "epoch": 2.9045671438309473, "grad_norm": 16.486816482042176, "learning_rate": 3.1020071472369074e-08, "loss": 1.2799, "step": 8522 }, { "epoch": 2.9049079754601226, "grad_norm": 14.37740480419204, "learning_rate": 3.0799907445819286e-08, "loss": 1.0299, "step": 8523 }, { "epoch": 2.905248807089298, "grad_norm": 15.077652299601008, "learning_rate": 3.058052509422538e-08, "loss": 1.9165, "step": 8524 }, { "epoch": 2.905589638718473, "grad_norm": 20.366257358359626, "learning_rate": 3.0361924452097516e-08, "loss": 1.5694, "step": 8525 }, { "epoch": 2.9059304703476485, "grad_norm": 13.224864017487441, "learning_rate": 3.014410555382097e-08, "loss": 1.1462, "step": 8526 }, { "epoch": 2.9062713019768234, "grad_norm": 16.478616282026422, "learning_rate": 2.992706843366167e-08, "loss": 1.5697, "step": 8527 }, { "epoch": 2.9066121336059987, "grad_norm": 32.04292326173052, "learning_rate": 2.971081312575841e-08, "loss": 0.809, "step": 8528 }, { "epoch": 2.906952965235174, "grad_norm": 26.044509143206522, "learning_rate": 2.9495339664130095e-08, "loss": 0.9337, "step": 8529 }, { "epoch": 2.907293796864349, "grad_norm": 15.356498881595268, "learning_rate": 2.9280648082670726e-08, "loss": 1.463, "step": 8530 }, { "epoch": 2.907634628493524, "grad_norm": 18.38807079594237, "learning_rate": 2.9066738415153285e-08, "loss": 1.8922, "step": 8531 }, { "epoch": 2.9079754601226995, "grad_norm": 21.210370833842354, "learning_rate": 2.8853610695226408e-08, "loss": 1.6522, "step": 8532 }, { "epoch": 2.9083162917518743, "grad_norm": 19.794760014632708, "learning_rate": 2.864126495641495e-08, "loss": 1.6261, "step": 8533 }, { "epoch": 2.9086571233810496, "grad_norm": 29.075158760591723, "learning_rate": 2.8429701232122187e-08, "loss": 2.0802, "step": 8534 }, { "epoch": 2.908997955010225, "grad_norm": 16.663585844025008, "learning_rate": 2.8218919555628722e-08, "loss": 1.963, "step": 8535 }, { "epoch": 2.9093387866394003, "grad_norm": 18.485579846962164, "learning_rate": 2.8008919960090253e-08, "loss": 1.2401, "step": 8536 }, { "epoch": 2.9096796182685756, "grad_norm": 12.766100254135631, "learning_rate": 2.7799702478540914e-08, "loss": 0.9483, "step": 8537 }, { "epoch": 2.9100204498977504, "grad_norm": 9.667753034189479, "learning_rate": 2.759126714389215e-08, "loss": 0.9948, "step": 8538 }, { "epoch": 2.9103612815269257, "grad_norm": 15.48795130757094, "learning_rate": 2.7383613988930525e-08, "loss": 0.7564, "step": 8539 }, { "epoch": 2.910702113156101, "grad_norm": 18.57032994633064, "learning_rate": 2.717674304632156e-08, "loss": 1.7795, "step": 8540 }, { "epoch": 2.911042944785276, "grad_norm": 12.046281011856411, "learning_rate": 2.697065434860646e-08, "loss": 1.5831, "step": 8541 }, { "epoch": 2.9113837764144512, "grad_norm": 19.514470906902257, "learning_rate": 2.6765347928203734e-08, "loss": 0.8347, "step": 8542 }, { "epoch": 2.9117246080436265, "grad_norm": 9.67037147784539, "learning_rate": 2.656082381740921e-08, "loss": 1.1755, "step": 8543 }, { "epoch": 2.9120654396728014, "grad_norm": 14.666335952648044, "learning_rate": 2.6357082048395488e-08, "loss": 1.6043, "step": 8544 }, { "epoch": 2.9124062713019767, "grad_norm": 11.731992025215652, "learning_rate": 2.6154122653211377e-08, "loss": 0.8668, "step": 8545 }, { "epoch": 2.912747102931152, "grad_norm": 10.727060272273487, "learning_rate": 2.5951945663783562e-08, "loss": 1.2057, "step": 8546 }, { "epoch": 2.9130879345603273, "grad_norm": 13.83171317363536, "learning_rate": 2.5750551111915488e-08, "loss": 1.4279, "step": 8547 }, { "epoch": 2.9134287661895026, "grad_norm": 10.783987063979742, "learning_rate": 2.554993902928682e-08, "loss": 1.1919, "step": 8548 }, { "epoch": 2.9137695978186775, "grad_norm": 19.065677396983393, "learning_rate": 2.5350109447455084e-08, "loss": 1.5895, "step": 8549 }, { "epoch": 2.914110429447853, "grad_norm": 18.180484675140992, "learning_rate": 2.515106239785403e-08, "loss": 1.7875, "step": 8550 }, { "epoch": 2.914451261077028, "grad_norm": 16.132342631289813, "learning_rate": 2.4952797911794168e-08, "loss": 1.2325, "step": 8551 }, { "epoch": 2.914792092706203, "grad_norm": 17.67119757629895, "learning_rate": 2.4755316020463883e-08, "loss": 1.435, "step": 8552 }, { "epoch": 2.9151329243353783, "grad_norm": 14.205208623847932, "learning_rate": 2.455861675492832e-08, "loss": 1.8742, "step": 8553 }, { "epoch": 2.9154737559645536, "grad_norm": 16.40950145246869, "learning_rate": 2.4362700146127737e-08, "loss": 1.0581, "step": 8554 }, { "epoch": 2.9158145875937285, "grad_norm": 18.4654699838022, "learning_rate": 2.4167566224881367e-08, "loss": 1.3191, "step": 8555 }, { "epoch": 2.9161554192229038, "grad_norm": 10.775491348491641, "learning_rate": 2.3973215021884656e-08, "loss": 1.391, "step": 8556 }, { "epoch": 2.916496250852079, "grad_norm": 26.221405131584312, "learning_rate": 2.377964656770926e-08, "loss": 1.9159, "step": 8557 }, { "epoch": 2.9168370824812544, "grad_norm": 11.585239817636948, "learning_rate": 2.3586860892805264e-08, "loss": 1.6216, "step": 8558 }, { "epoch": 2.9171779141104297, "grad_norm": 18.661426894510413, "learning_rate": 2.339485802749786e-08, "loss": 1.3217, "step": 8559 }, { "epoch": 2.9175187457396046, "grad_norm": 13.87107507695412, "learning_rate": 2.320363800198955e-08, "loss": 1.6715, "step": 8560 }, { "epoch": 2.91785957736878, "grad_norm": 14.380670539582674, "learning_rate": 2.3013200846360717e-08, "loss": 1.5202, "step": 8561 }, { "epoch": 2.918200408997955, "grad_norm": 15.197060604546717, "learning_rate": 2.2823546590567958e-08, "loss": 0.9059, "step": 8562 }, { "epoch": 2.91854124062713, "grad_norm": 14.195243838343858, "learning_rate": 2.2634675264444073e-08, "loss": 1.6501, "step": 8563 }, { "epoch": 2.9188820722563054, "grad_norm": 14.793538743063742, "learning_rate": 2.2446586897699185e-08, "loss": 1.486, "step": 8564 }, { "epoch": 2.9192229038854807, "grad_norm": 13.152414093466783, "learning_rate": 2.2259281519921295e-08, "loss": 1.2486, "step": 8565 }, { "epoch": 2.9195637355146555, "grad_norm": 14.0001850368025, "learning_rate": 2.2072759160572944e-08, "loss": 0.8972, "step": 8566 }, { "epoch": 2.919904567143831, "grad_norm": 18.147437335371677, "learning_rate": 2.1887019848996216e-08, "loss": 1.015, "step": 8567 }, { "epoch": 2.920245398773006, "grad_norm": 33.252835309421855, "learning_rate": 2.170206361440774e-08, "loss": 1.9784, "step": 8568 }, { "epoch": 2.9205862304021815, "grad_norm": 19.543140273995856, "learning_rate": 2.1517890485902028e-08, "loss": 1.199, "step": 8569 }, { "epoch": 2.9209270620313568, "grad_norm": 16.042624675030755, "learning_rate": 2.133450049245034e-08, "loss": 1.7063, "step": 8570 }, { "epoch": 2.9212678936605316, "grad_norm": 14.679622655965144, "learning_rate": 2.1151893662900712e-08, "loss": 1.6599, "step": 8571 }, { "epoch": 2.921608725289707, "grad_norm": 13.843459398872325, "learning_rate": 2.0970070025977952e-08, "loss": 1.2798, "step": 8572 }, { "epoch": 2.9219495569188823, "grad_norm": 14.241016694438674, "learning_rate": 2.0789029610283064e-08, "loss": 1.3597, "step": 8573 }, { "epoch": 2.922290388548057, "grad_norm": 14.234500712330973, "learning_rate": 2.060877244429549e-08, "loss": 1.7335, "step": 8574 }, { "epoch": 2.9226312201772324, "grad_norm": 17.26196366258333, "learning_rate": 2.042929855636977e-08, "loss": 1.6363, "step": 8575 }, { "epoch": 2.9229720518064077, "grad_norm": 14.525610302152266, "learning_rate": 2.0250607974737214e-08, "loss": 1.487, "step": 8576 }, { "epoch": 2.9233128834355826, "grad_norm": 23.10031589203348, "learning_rate": 2.007270072750811e-08, "loss": 1.0803, "step": 8577 }, { "epoch": 2.923653715064758, "grad_norm": 17.04693954521189, "learning_rate": 1.9895576842666743e-08, "loss": 1.9102, "step": 8578 }, { "epoch": 2.923994546693933, "grad_norm": 16.856590509496062, "learning_rate": 1.971923634807582e-08, "loss": 1.8996, "step": 8579 }, { "epoch": 2.9243353783231085, "grad_norm": 15.507179967053363, "learning_rate": 1.9543679271474824e-08, "loss": 1.7114, "step": 8580 }, { "epoch": 2.924676209952284, "grad_norm": 14.181383205545426, "learning_rate": 1.936890564047833e-08, "loss": 1.368, "step": 8581 }, { "epoch": 2.9250170415814587, "grad_norm": 10.800216804612969, "learning_rate": 1.9194915482580455e-08, "loss": 0.9333, "step": 8582 }, { "epoch": 2.925357873210634, "grad_norm": 14.914334616181398, "learning_rate": 1.902170882514931e-08, "loss": 1.9051, "step": 8583 }, { "epoch": 2.9256987048398093, "grad_norm": 19.58111025412017, "learning_rate": 1.884928569543143e-08, "loss": 1.158, "step": 8584 }, { "epoch": 2.926039536468984, "grad_norm": 16.26026063371142, "learning_rate": 1.867764612055012e-08, "loss": 1.273, "step": 8585 }, { "epoch": 2.9263803680981595, "grad_norm": 15.978463034887712, "learning_rate": 1.8506790127504337e-08, "loss": 1.7219, "step": 8586 }, { "epoch": 2.926721199727335, "grad_norm": 11.263761488376462, "learning_rate": 1.833671774317092e-08, "loss": 1.1236, "step": 8587 }, { "epoch": 2.9270620313565097, "grad_norm": 15.816236566880669, "learning_rate": 1.816742899430235e-08, "loss": 1.6076, "step": 8588 }, { "epoch": 2.927402862985685, "grad_norm": 17.440587121405194, "learning_rate": 1.7998923907528444e-08, "loss": 1.9577, "step": 8589 }, { "epoch": 2.9277436946148603, "grad_norm": 14.845926241210684, "learning_rate": 1.783120250935688e-08, "loss": 1.6416, "step": 8590 }, { "epoch": 2.9280845262440356, "grad_norm": 13.545197967426947, "learning_rate": 1.766426482616934e-08, "loss": 1.8114, "step": 8591 }, { "epoch": 2.928425357873211, "grad_norm": 12.987617746018705, "learning_rate": 1.749811088422704e-08, "loss": 1.6007, "step": 8592 }, { "epoch": 2.9287661895023858, "grad_norm": 15.159695334511643, "learning_rate": 1.7332740709665174e-08, "loss": 0.9089, "step": 8593 }, { "epoch": 2.929107021131561, "grad_norm": 17.83428146721345, "learning_rate": 1.7168154328499055e-08, "loss": 1.013, "step": 8594 }, { "epoch": 2.9294478527607364, "grad_norm": 15.52656885238594, "learning_rate": 1.7004351766616854e-08, "loss": 1.965, "step": 8595 }, { "epoch": 2.9297886843899112, "grad_norm": 13.500446902156487, "learning_rate": 1.6841333049786855e-08, "loss": 1.3432, "step": 8596 }, { "epoch": 2.9301295160190866, "grad_norm": 16.361778906533495, "learning_rate": 1.6679098203651878e-08, "loss": 0.8819, "step": 8597 }, { "epoch": 2.930470347648262, "grad_norm": 19.331850925432136, "learning_rate": 1.651764725373206e-08, "loss": 1.1618, "step": 8598 }, { "epoch": 2.9308111792774367, "grad_norm": 30.148657224108412, "learning_rate": 1.6356980225424868e-08, "loss": 1.5087, "step": 8599 }, { "epoch": 2.931152010906612, "grad_norm": 20.255590750428993, "learning_rate": 1.619709714400286e-08, "loss": 1.827, "step": 8600 }, { "epoch": 2.9314928425357873, "grad_norm": 16.221503034435525, "learning_rate": 1.603799803461703e-08, "loss": 1.5123, "step": 8601 }, { "epoch": 2.9318336741649627, "grad_norm": 11.535754447865019, "learning_rate": 1.5879682922294027e-08, "loss": 1.0872, "step": 8602 }, { "epoch": 2.932174505794138, "grad_norm": 13.97257949982635, "learning_rate": 1.5722151831937816e-08, "loss": 1.3945, "step": 8603 }, { "epoch": 2.932515337423313, "grad_norm": 24.26248444148853, "learning_rate": 1.556540478832802e-08, "loss": 1.6107, "step": 8604 }, { "epoch": 2.932856169052488, "grad_norm": 16.39051192630273, "learning_rate": 1.5409441816122138e-08, "loss": 0.8853, "step": 8605 }, { "epoch": 2.9331970006816634, "grad_norm": 13.760881221303123, "learning_rate": 1.525426293985388e-08, "loss": 1.5612, "step": 8606 }, { "epoch": 2.9335378323108383, "grad_norm": 16.743581868499096, "learning_rate": 1.509986818393261e-08, "loss": 1.425, "step": 8607 }, { "epoch": 2.9338786639400136, "grad_norm": 18.911542388345026, "learning_rate": 1.4946257572646116e-08, "loss": 1.4418, "step": 8608 }, { "epoch": 2.934219495569189, "grad_norm": 29.472454774355914, "learning_rate": 1.4793431130157854e-08, "loss": 1.8294, "step": 8609 }, { "epoch": 2.934560327198364, "grad_norm": 19.1493044228161, "learning_rate": 1.4641388880507479e-08, "loss": 1.1564, "step": 8610 }, { "epoch": 2.934901158827539, "grad_norm": 17.20699852448855, "learning_rate": 1.4490130847612527e-08, "loss": 1.1155, "step": 8611 }, { "epoch": 2.9352419904567144, "grad_norm": 16.67961299379523, "learning_rate": 1.4339657055265632e-08, "loss": 1.4131, "step": 8612 }, { "epoch": 2.9355828220858897, "grad_norm": 20.093846906275523, "learning_rate": 1.418996752713786e-08, "loss": 1.2476, "step": 8613 }, { "epoch": 2.935923653715065, "grad_norm": 12.418595472841353, "learning_rate": 1.4041062286775375e-08, "loss": 1.6949, "step": 8614 }, { "epoch": 2.93626448534424, "grad_norm": 17.368803637122426, "learning_rate": 1.389294135760222e-08, "loss": 1.7027, "step": 8615 }, { "epoch": 2.936605316973415, "grad_norm": 12.371924537407596, "learning_rate": 1.374560476291753e-08, "loss": 1.3868, "step": 8616 }, { "epoch": 2.9369461486025905, "grad_norm": 13.823815592355194, "learning_rate": 1.3599052525898327e-08, "loss": 1.2486, "step": 8617 }, { "epoch": 2.9372869802317654, "grad_norm": 12.836353327258843, "learning_rate": 1.3453284669597833e-08, "loss": 1.4046, "step": 8618 }, { "epoch": 2.9376278118609407, "grad_norm": 10.778397294689825, "learning_rate": 1.330830121694604e-08, "loss": 0.8515, "step": 8619 }, { "epoch": 2.937968643490116, "grad_norm": 16.54153290488683, "learning_rate": 1.316410219074915e-08, "loss": 1.4021, "step": 8620 }, { "epoch": 2.938309475119291, "grad_norm": 12.201115779137458, "learning_rate": 1.3020687613690685e-08, "loss": 1.3311, "step": 8621 }, { "epoch": 2.938650306748466, "grad_norm": 14.885679169789656, "learning_rate": 1.2878057508329823e-08, "loss": 1.2737, "step": 8622 }, { "epoch": 2.9389911383776415, "grad_norm": 11.894284993349144, "learning_rate": 1.2736211897103613e-08, "loss": 1.0235, "step": 8623 }, { "epoch": 2.939331970006817, "grad_norm": 12.908963708539318, "learning_rate": 1.2595150802324207e-08, "loss": 1.3134, "step": 8624 }, { "epoch": 2.939672801635992, "grad_norm": 12.854405694826342, "learning_rate": 1.2454874246181081e-08, "loss": 0.9604, "step": 8625 }, { "epoch": 2.940013633265167, "grad_norm": 9.001557313884804, "learning_rate": 1.231538225074047e-08, "loss": 1.0381, "step": 8626 }, { "epoch": 2.9403544648943423, "grad_norm": 21.796673732147852, "learning_rate": 1.217667483794538e-08, "loss": 1.123, "step": 8627 }, { "epoch": 2.9406952965235176, "grad_norm": 13.917167948494303, "learning_rate": 1.2038752029614464e-08, "loss": 1.421, "step": 8628 }, { "epoch": 2.9410361281526924, "grad_norm": 15.995164295997728, "learning_rate": 1.1901613847443706e-08, "loss": 1.4732, "step": 8629 }, { "epoch": 2.9413769597818678, "grad_norm": 20.752417998906317, "learning_rate": 1.1765260313005844e-08, "loss": 1.1574, "step": 8630 }, { "epoch": 2.941717791411043, "grad_norm": 14.922126841516329, "learning_rate": 1.162969144774928e-08, "loss": 1.7117, "step": 8631 }, { "epoch": 2.942058623040218, "grad_norm": 18.92300045395376, "learning_rate": 1.1494907272999733e-08, "loss": 0.8706, "step": 8632 }, { "epoch": 2.9423994546693932, "grad_norm": 13.320803452364126, "learning_rate": 1.1360907809959687e-08, "loss": 1.546, "step": 8633 }, { "epoch": 2.9427402862985685, "grad_norm": 23.503239157882177, "learning_rate": 1.122769307970728e-08, "loss": 0.9436, "step": 8634 }, { "epoch": 2.943081117927744, "grad_norm": 11.894010569996643, "learning_rate": 1.1095263103197973e-08, "loss": 1.1001, "step": 8635 }, { "epoch": 2.943421949556919, "grad_norm": 11.660001747452846, "learning_rate": 1.0963617901263434e-08, "loss": 0.9267, "step": 8636 }, { "epoch": 2.943762781186094, "grad_norm": 15.777766658537628, "learning_rate": 1.08327574946121e-08, "loss": 1.6118, "step": 8637 }, { "epoch": 2.9441036128152693, "grad_norm": 12.660326600414482, "learning_rate": 1.0702681903828615e-08, "loss": 1.8174, "step": 8638 }, { "epoch": 2.9444444444444446, "grad_norm": 12.994033857651692, "learning_rate": 1.057339114937439e-08, "loss": 1.2878, "step": 8639 }, { "epoch": 2.9447852760736195, "grad_norm": 15.739674970759928, "learning_rate": 1.04448852515876e-08, "loss": 1.1095, "step": 8640 }, { "epoch": 2.945126107702795, "grad_norm": 11.434976591648086, "learning_rate": 1.031716423068263e-08, "loss": 1.446, "step": 8641 }, { "epoch": 2.94546693933197, "grad_norm": 13.917060989151137, "learning_rate": 1.0190228106750633e-08, "loss": 1.2087, "step": 8642 }, { "epoch": 2.945807770961145, "grad_norm": 18.638080835625615, "learning_rate": 1.0064076899758968e-08, "loss": 1.2174, "step": 8643 }, { "epoch": 2.9461486025903203, "grad_norm": 10.402169140323856, "learning_rate": 9.938710629551763e-09, "loss": 1.4282, "step": 8644 }, { "epoch": 2.9464894342194956, "grad_norm": 11.47390879455159, "learning_rate": 9.814129315849908e-09, "loss": 1.4103, "step": 8645 }, { "epoch": 2.946830265848671, "grad_norm": 15.685189311277485, "learning_rate": 9.69033297824995e-09, "loss": 1.6045, "step": 8646 }, { "epoch": 2.947171097477846, "grad_norm": 16.046300767971534, "learning_rate": 9.567321636225757e-09, "loss": 0.7149, "step": 8647 }, { "epoch": 2.947511929107021, "grad_norm": 14.796919036008703, "learning_rate": 9.445095309127961e-09, "loss": 1.5703, "step": 8648 }, { "epoch": 2.9478527607361964, "grad_norm": 15.054574786216534, "learning_rate": 9.323654016182848e-09, "loss": 1.8019, "step": 8649 }, { "epoch": 2.9481935923653717, "grad_norm": 16.23664189110524, "learning_rate": 9.202997776493472e-09, "loss": 1.409, "step": 8650 }, { "epoch": 2.9485344239945466, "grad_norm": 10.574074741040537, "learning_rate": 9.083126609040206e-09, "loss": 0.9976, "step": 8651 }, { "epoch": 2.948875255623722, "grad_norm": 16.139278570743258, "learning_rate": 8.96404053267852e-09, "loss": 1.6064, "step": 8652 }, { "epoch": 2.949216087252897, "grad_norm": 12.792669250067183, "learning_rate": 8.84573956614121e-09, "loss": 1.3869, "step": 8653 }, { "epoch": 2.949556918882072, "grad_norm": 12.245682733401445, "learning_rate": 8.728223728038388e-09, "loss": 1.0543, "step": 8654 }, { "epoch": 2.9498977505112474, "grad_norm": 14.510615822644022, "learning_rate": 8.611493036854157e-09, "loss": 1.8609, "step": 8655 }, { "epoch": 2.9502385821404227, "grad_norm": 14.36889300575775, "learning_rate": 8.495547510952163e-09, "loss": 1.5218, "step": 8656 }, { "epoch": 2.950579413769598, "grad_norm": 18.139075817700494, "learning_rate": 8.380387168570592e-09, "loss": 1.2239, "step": 8657 }, { "epoch": 2.950920245398773, "grad_norm": 13.114127164223571, "learning_rate": 8.266012027824954e-09, "loss": 1.4188, "step": 8658 }, { "epoch": 2.951261077027948, "grad_norm": 12.252060062976652, "learning_rate": 8.15242210670586e-09, "loss": 1.0566, "step": 8659 }, { "epoch": 2.9516019086571235, "grad_norm": 14.401278042428666, "learning_rate": 8.039617423082346e-09, "loss": 1.7395, "step": 8660 }, { "epoch": 2.9519427402862988, "grad_norm": 13.009425070688584, "learning_rate": 7.92759799469911e-09, "loss": 1.2238, "step": 8661 }, { "epoch": 2.9522835719154736, "grad_norm": 22.018122390743613, "learning_rate": 7.816363839177054e-09, "loss": 1.2757, "step": 8662 }, { "epoch": 2.952624403544649, "grad_norm": 12.953120185737616, "learning_rate": 7.705914974013296e-09, "loss": 1.5164, "step": 8663 }, { "epoch": 2.9529652351738243, "grad_norm": 42.039641954630525, "learning_rate": 7.596251416582822e-09, "loss": 1.2412, "step": 8664 }, { "epoch": 2.953306066802999, "grad_norm": 48.91256893337498, "learning_rate": 7.487373184135171e-09, "loss": 1.1311, "step": 8665 }, { "epoch": 2.9536468984321744, "grad_norm": 19.53535799287134, "learning_rate": 7.379280293797753e-09, "loss": 1.6843, "step": 8666 }, { "epoch": 2.9539877300613497, "grad_norm": 12.814526661312811, "learning_rate": 7.271972762574187e-09, "loss": 1.2731, "step": 8667 }, { "epoch": 2.954328561690525, "grad_norm": 19.17491488549641, "learning_rate": 7.165450607344304e-09, "loss": 0.9876, "step": 8668 }, { "epoch": 2.9546693933197, "grad_norm": 16.52254916454829, "learning_rate": 7.059713844864147e-09, "loss": 1.9505, "step": 8669 }, { "epoch": 2.955010224948875, "grad_norm": 14.508045637576299, "learning_rate": 6.954762491767075e-09, "loss": 1.4495, "step": 8670 }, { "epoch": 2.9553510565780505, "grad_norm": 13.134310111946034, "learning_rate": 6.850596564561551e-09, "loss": 1.6138, "step": 8671 }, { "epoch": 2.955691888207226, "grad_norm": 13.811437175344023, "learning_rate": 6.747216079633911e-09, "loss": 1.3598, "step": 8672 }, { "epoch": 2.9560327198364007, "grad_norm": 17.123777687389698, "learning_rate": 6.644621053246702e-09, "loss": 1.2517, "step": 8673 }, { "epoch": 2.956373551465576, "grad_norm": 18.81309701992254, "learning_rate": 6.542811501537571e-09, "loss": 1.068, "step": 8674 }, { "epoch": 2.9567143830947513, "grad_norm": 13.269690078259375, "learning_rate": 6.441787440521485e-09, "loss": 1.5072, "step": 8675 }, { "epoch": 2.957055214723926, "grad_norm": 22.789193904174475, "learning_rate": 6.341548886091286e-09, "loss": 1.3938, "step": 8676 }, { "epoch": 2.9573960463531015, "grad_norm": 13.71253497308214, "learning_rate": 6.242095854013808e-09, "loss": 1.2105, "step": 8677 }, { "epoch": 2.957736877982277, "grad_norm": 17.594498798793598, "learning_rate": 6.143428359933201e-09, "loss": 1.3587, "step": 8678 }, { "epoch": 2.9580777096114517, "grad_norm": 14.183136634183352, "learning_rate": 6.045546419371496e-09, "loss": 1.5091, "step": 8679 }, { "epoch": 2.958418541240627, "grad_norm": 19.9473363463764, "learning_rate": 5.9484500477247075e-09, "loss": 1.2338, "step": 8680 }, { "epoch": 2.9587593728698023, "grad_norm": 16.28830225110535, "learning_rate": 5.8521392602667314e-09, "loss": 1.2302, "step": 8681 }, { "epoch": 2.9591002044989776, "grad_norm": 10.535966445017367, "learning_rate": 5.756614072147671e-09, "loss": 1.2325, "step": 8682 }, { "epoch": 2.959441036128153, "grad_norm": 23.066292903098244, "learning_rate": 5.661874498394393e-09, "loss": 1.2344, "step": 8683 }, { "epoch": 2.9597818677573278, "grad_norm": 18.289654450956192, "learning_rate": 5.567920553909978e-09, "loss": 1.0006, "step": 8684 }, { "epoch": 2.960122699386503, "grad_norm": 19.29515633394969, "learning_rate": 5.474752253472604e-09, "loss": 1.247, "step": 8685 }, { "epoch": 2.9604635310156784, "grad_norm": 11.197093384867285, "learning_rate": 5.382369611739435e-09, "loss": 1.1371, "step": 8686 }, { "epoch": 2.9608043626448532, "grad_norm": 15.014400457347097, "learning_rate": 5.290772643241626e-09, "loss": 1.0969, "step": 8687 }, { "epoch": 2.9611451942740286, "grad_norm": 17.329205615622232, "learning_rate": 5.199961362387651e-09, "loss": 1.4793, "step": 8688 }, { "epoch": 2.961486025903204, "grad_norm": 17.36743352180151, "learning_rate": 5.109935783463305e-09, "loss": 1.2795, "step": 8689 }, { "epoch": 2.9618268575323787, "grad_norm": 17.78295320893461, "learning_rate": 5.0206959206300364e-09, "loss": 1.2827, "step": 8690 }, { "epoch": 2.962167689161554, "grad_norm": 13.384235586109837, "learning_rate": 4.9322417879243965e-09, "loss": 1.6259, "step": 8691 }, { "epoch": 2.9625085207907293, "grad_norm": 15.914181173616491, "learning_rate": 4.84457339926192e-09, "loss": 1.6242, "step": 8692 }, { "epoch": 2.9628493524199047, "grad_norm": 17.484786054924836, "learning_rate": 4.757690768432133e-09, "loss": 1.073, "step": 8693 }, { "epoch": 2.96319018404908, "grad_norm": 15.989498758071397, "learning_rate": 4.6715939091024344e-09, "loss": 1.3036, "step": 8694 }, { "epoch": 2.963531015678255, "grad_norm": 9.071651192899399, "learning_rate": 4.586282834816991e-09, "loss": 1.0327, "step": 8695 }, { "epoch": 2.96387184730743, "grad_norm": 22.70079678069397, "learning_rate": 4.501757558993958e-09, "loss": 1.4697, "step": 8696 }, { "epoch": 2.9642126789366054, "grad_norm": 16.772055267569474, "learning_rate": 4.418018094931032e-09, "loss": 1.1554, "step": 8697 }, { "epoch": 2.9645535105657803, "grad_norm": 15.876814461257688, "learning_rate": 4.335064455799898e-09, "loss": 1.2923, "step": 8698 }, { "epoch": 2.9648943421949556, "grad_norm": 14.24914999817876, "learning_rate": 4.2528966546495634e-09, "loss": 1.5015, "step": 8699 }, { "epoch": 2.965235173824131, "grad_norm": 15.691152807107361, "learning_rate": 4.171514704405799e-09, "loss": 1.916, "step": 8700 }, { "epoch": 2.965576005453306, "grad_norm": 18.468593879896456, "learning_rate": 4.0909186178700325e-09, "loss": 1.3798, "step": 8701 }, { "epoch": 2.965916837082481, "grad_norm": 13.018833352749837, "learning_rate": 4.011108407719899e-09, "loss": 1.4807, "step": 8702 }, { "epoch": 2.9662576687116564, "grad_norm": 10.167267202927293, "learning_rate": 3.932084086510912e-09, "loss": 1.089, "step": 8703 }, { "epoch": 2.9665985003408317, "grad_norm": 16.955101139888555, "learning_rate": 3.8538456666725736e-09, "loss": 1.1487, "step": 8704 }, { "epoch": 2.966939331970007, "grad_norm": 16.80441577963747, "learning_rate": 3.7763931605133694e-09, "loss": 1.8121, "step": 8705 }, { "epoch": 2.967280163599182, "grad_norm": 14.68001659111009, "learning_rate": 3.699726580216334e-09, "loss": 1.0802, "step": 8706 }, { "epoch": 2.967620995228357, "grad_norm": 9.98982755038774, "learning_rate": 3.623845937841264e-09, "loss": 1.2851, "step": 8707 }, { "epoch": 2.9679618268575325, "grad_norm": 28.4403213855046, "learning_rate": 3.548751245324722e-09, "loss": 1.3708, "step": 8708 }, { "epoch": 2.9683026584867074, "grad_norm": 12.522838137007149, "learning_rate": 3.4744425144789262e-09, "loss": 1.5277, "step": 8709 }, { "epoch": 2.9686434901158827, "grad_norm": 13.036411057043248, "learning_rate": 3.40091975699397e-09, "loss": 1.1401, "step": 8710 }, { "epoch": 2.968984321745058, "grad_norm": 14.002368013613793, "learning_rate": 3.3281829844344917e-09, "loss": 1.482, "step": 8711 }, { "epoch": 2.969325153374233, "grad_norm": 15.40560779480544, "learning_rate": 3.256232208241894e-09, "loss": 1.9392, "step": 8712 }, { "epoch": 2.969665985003408, "grad_norm": 11.402353154309171, "learning_rate": 3.1850674397354563e-09, "loss": 1.0036, "step": 8713 }, { "epoch": 2.9700068166325835, "grad_norm": 15.548131149146515, "learning_rate": 3.1146886901090024e-09, "loss": 1.5951, "step": 8714 }, { "epoch": 2.970347648261759, "grad_norm": 36.819427247516835, "learning_rate": 3.0450959704331207e-09, "loss": 1.6158, "step": 8715 }, { "epoch": 2.970688479890934, "grad_norm": 13.641395289755538, "learning_rate": 2.9762892916557205e-09, "loss": 1.7867, "step": 8716 }, { "epoch": 2.971029311520109, "grad_norm": 20.700161598521817, "learning_rate": 2.9082686645998113e-09, "loss": 2.0366, "step": 8717 }, { "epoch": 2.9713701431492843, "grad_norm": 45.706940006095024, "learning_rate": 2.8410340999657226e-09, "loss": 1.5135, "step": 8718 }, { "epoch": 2.9717109747784596, "grad_norm": 13.551935798355906, "learning_rate": 2.7745856083294386e-09, "loss": 1.5252, "step": 8719 }, { "epoch": 2.9720518064076344, "grad_norm": 21.699153490944383, "learning_rate": 2.7089232001431543e-09, "loss": 1.4525, "step": 8720 }, { "epoch": 2.9723926380368098, "grad_norm": 37.0888857504136, "learning_rate": 2.6440468857369396e-09, "loss": 1.7783, "step": 8721 }, { "epoch": 2.972733469665985, "grad_norm": 19.773664006700134, "learning_rate": 2.5799566753154094e-09, "loss": 1.2775, "step": 8722 }, { "epoch": 2.97307430129516, "grad_norm": 25.06010456748568, "learning_rate": 2.516652578959944e-09, "loss": 1.4918, "step": 8723 }, { "epoch": 2.9734151329243352, "grad_norm": 15.25037868842791, "learning_rate": 2.4541346066292437e-09, "loss": 1.7249, "step": 8724 }, { "epoch": 2.9737559645535105, "grad_norm": 17.26107512486261, "learning_rate": 2.3924027681571093e-09, "loss": 1.2717, "step": 8725 }, { "epoch": 2.974096796182686, "grad_norm": 14.993020431531003, "learning_rate": 2.3314570732541063e-09, "loss": 1.7361, "step": 8726 }, { "epoch": 2.974437627811861, "grad_norm": 15.010723043096037, "learning_rate": 2.2712975315075658e-09, "loss": 1.0909, "step": 8727 }, { "epoch": 2.974778459441036, "grad_norm": 16.247327811717707, "learning_rate": 2.2119241523810287e-09, "loss": 1.3751, "step": 8728 }, { "epoch": 2.9751192910702113, "grad_norm": 12.74991591906319, "learning_rate": 2.153336945213691e-09, "loss": 1.7517, "step": 8729 }, { "epoch": 2.9754601226993866, "grad_norm": 11.99642490312561, "learning_rate": 2.0955359192215143e-09, "loss": 1.0799, "step": 8730 }, { "epoch": 2.9758009543285615, "grad_norm": 21.781742986694297, "learning_rate": 2.0385210834972245e-09, "loss": 1.8693, "step": 8731 }, { "epoch": 2.976141785957737, "grad_norm": 16.701900721563774, "learning_rate": 1.982292447009204e-09, "loss": 1.605, "step": 8732 }, { "epoch": 2.976482617586912, "grad_norm": 11.385649081727555, "learning_rate": 1.9268500186025994e-09, "loss": 1.4428, "step": 8733 }, { "epoch": 2.976823449216087, "grad_norm": 12.537288480308787, "learning_rate": 1.8721938069987676e-09, "loss": 1.1865, "step": 8734 }, { "epoch": 2.9771642808452623, "grad_norm": 16.414246243353716, "learning_rate": 1.8183238207952759e-09, "loss": 1.6654, "step": 8735 }, { "epoch": 2.9775051124744376, "grad_norm": 13.247236961257771, "learning_rate": 1.765240068465901e-09, "loss": 1.2454, "step": 8736 }, { "epoch": 2.977845944103613, "grad_norm": 17.099696628245884, "learning_rate": 1.7129425583611857e-09, "loss": 1.158, "step": 8737 }, { "epoch": 2.9781867757327882, "grad_norm": 35.4342241643858, "learning_rate": 1.6614312987078828e-09, "loss": 1.3809, "step": 8738 }, { "epoch": 2.978527607361963, "grad_norm": 16.366758283759978, "learning_rate": 1.6107062976083998e-09, "loss": 1.5384, "step": 8739 }, { "epoch": 2.9788684389911384, "grad_norm": 31.616451754245432, "learning_rate": 1.5607675630419094e-09, "loss": 2.0056, "step": 8740 }, { "epoch": 2.9792092706203137, "grad_norm": 9.08069967640011, "learning_rate": 1.5116151028649052e-09, "loss": 1.3891, "step": 8741 }, { "epoch": 2.9795501022494886, "grad_norm": 7.8722954420131765, "learning_rate": 1.4632489248084247e-09, "loss": 1.0138, "step": 8742 }, { "epoch": 2.979890933878664, "grad_norm": 17.699351124924032, "learning_rate": 1.4156690364808267e-09, "loss": 1.5058, "step": 8743 }, { "epoch": 2.980231765507839, "grad_norm": 15.508553959367113, "learning_rate": 1.3688754453666797e-09, "loss": 1.0433, "step": 8744 }, { "epoch": 2.980572597137014, "grad_norm": 10.647124325623421, "learning_rate": 1.3228681588267623e-09, "loss": 1.0818, "step": 8745 }, { "epoch": 2.9809134287661894, "grad_norm": 18.815185437967003, "learning_rate": 1.2776471840986182e-09, "loss": 1.0788, "step": 8746 }, { "epoch": 2.9812542603953647, "grad_norm": 18.21682129905519, "learning_rate": 1.2332125282948915e-09, "loss": 1.0344, "step": 8747 }, { "epoch": 2.98159509202454, "grad_norm": 16.0548507210014, "learning_rate": 1.1895641984061012e-09, "loss": 1.347, "step": 8748 }, { "epoch": 2.9819359236537153, "grad_norm": 12.628833390077013, "learning_rate": 1.1467022012978667e-09, "loss": 1.6541, "step": 8749 }, { "epoch": 2.98227675528289, "grad_norm": 14.413029137103704, "learning_rate": 1.1046265437131276e-09, "loss": 1.3072, "step": 8750 }, { "epoch": 2.9826175869120655, "grad_norm": 14.882569236851317, "learning_rate": 1.063337232269368e-09, "loss": 1.0298, "step": 8751 }, { "epoch": 2.9829584185412408, "grad_norm": 15.878146160799863, "learning_rate": 1.022834273462503e-09, "loss": 1.4061, "step": 8752 }, { "epoch": 2.9832992501704156, "grad_norm": 17.480825167331275, "learning_rate": 9.831176736635473e-10, "loss": 2.0342, "step": 8753 }, { "epoch": 2.983640081799591, "grad_norm": 14.6003607295353, "learning_rate": 9.44187439120281e-10, "loss": 1.4236, "step": 8754 }, { "epoch": 2.9839809134287663, "grad_norm": 13.022309975826323, "learning_rate": 9.060435759566944e-10, "loss": 1.538, "step": 8755 }, { "epoch": 2.984321745057941, "grad_norm": 20.52988288870748, "learning_rate": 8.686860901718775e-10, "loss": 2.3399, "step": 8756 }, { "epoch": 2.9846625766871164, "grad_norm": 13.109524388123395, "learning_rate": 8.32114987643906e-10, "loss": 1.5327, "step": 8757 }, { "epoch": 2.9850034083162917, "grad_norm": 14.978285743690895, "learning_rate": 7.963302741242906e-10, "loss": 1.199, "step": 8758 }, { "epoch": 2.985344239945467, "grad_norm": 15.07117059362515, "learning_rate": 7.613319552429721e-10, "loss": 1.4839, "step": 8759 }, { "epoch": 2.9856850715746424, "grad_norm": 207.57297653500828, "learning_rate": 7.271200365049913e-10, "loss": 1.3884, "step": 8760 }, { "epoch": 2.9860259032038172, "grad_norm": 11.792015859776921, "learning_rate": 6.936945232915993e-10, "loss": 1.3367, "step": 8761 }, { "epoch": 2.9863667348329925, "grad_norm": 13.755168853019155, "learning_rate": 6.610554208613673e-10, "loss": 1.3477, "step": 8762 }, { "epoch": 2.986707566462168, "grad_norm": 13.034924800363322, "learning_rate": 6.292027343479668e-10, "loss": 1.5284, "step": 8763 }, { "epoch": 2.9870483980913427, "grad_norm": 19.346684337656228, "learning_rate": 5.981364687629443e-10, "loss": 1.8468, "step": 8764 }, { "epoch": 2.987389229720518, "grad_norm": 18.686691550434507, "learning_rate": 5.678566289918363e-10, "loss": 1.4882, "step": 8765 }, { "epoch": 2.9877300613496933, "grad_norm": 14.46547381950829, "learning_rate": 5.383632197991651e-10, "loss": 1.2136, "step": 8766 }, { "epoch": 2.988070892978868, "grad_norm": 20.568440299912247, "learning_rate": 5.096562458234422e-10, "loss": 1.2858, "step": 8767 }, { "epoch": 2.9884117246080435, "grad_norm": 22.78960819418655, "learning_rate": 4.817357115804999e-10, "loss": 1.6174, "step": 8768 }, { "epoch": 2.988752556237219, "grad_norm": 15.796673862941324, "learning_rate": 4.546016214623805e-10, "loss": 1.1207, "step": 8769 }, { "epoch": 2.989093387866394, "grad_norm": 21.356990194560566, "learning_rate": 4.282539797373364e-10, "loss": 1.1166, "step": 8770 }, { "epoch": 2.9894342194955694, "grad_norm": 13.33039359521358, "learning_rate": 4.0269279055038525e-10, "loss": 1.1683, "step": 8771 }, { "epoch": 2.9897750511247443, "grad_norm": 44.765580566339956, "learning_rate": 3.779180579221997e-10, "loss": 1.6973, "step": 8772 }, { "epoch": 2.9901158827539196, "grad_norm": 15.165777421899378, "learning_rate": 3.5392978574966263e-10, "loss": 1.3061, "step": 8773 }, { "epoch": 2.990456714383095, "grad_norm": 15.143257472111436, "learning_rate": 3.3072797780642207e-10, "loss": 1.0042, "step": 8774 }, { "epoch": 2.9907975460122698, "grad_norm": 13.643589023309422, "learning_rate": 3.083126377417811e-10, "loss": 0.9342, "step": 8775 }, { "epoch": 2.991138377641445, "grad_norm": 15.775919554617062, "learning_rate": 2.866837690829183e-10, "loss": 1.2619, "step": 8776 }, { "epoch": 2.9914792092706204, "grad_norm": 16.52639959580398, "learning_rate": 2.6584137523100183e-10, "loss": 1.0829, "step": 8777 }, { "epoch": 2.9918200408997953, "grad_norm": 15.932087749396446, "learning_rate": 2.4578545946507546e-10, "loss": 1.7002, "step": 8778 }, { "epoch": 2.9921608725289706, "grad_norm": 14.072840619944818, "learning_rate": 2.2651602493983792e-10, "loss": 1.1174, "step": 8779 }, { "epoch": 2.992501704158146, "grad_norm": 13.730307105529477, "learning_rate": 2.0803307468675317e-10, "loss": 1.0698, "step": 8780 }, { "epoch": 2.992842535787321, "grad_norm": 18.684302368506653, "learning_rate": 1.9033661161349525e-10, "loss": 1.5043, "step": 8781 }, { "epoch": 2.9931833674164965, "grad_norm": 15.322318548911477, "learning_rate": 1.7342663850283826e-10, "loss": 1.1541, "step": 8782 }, { "epoch": 2.9935241990456714, "grad_norm": 13.545635350404503, "learning_rate": 1.5730315801543161e-10, "loss": 1.4986, "step": 8783 }, { "epoch": 2.9938650306748467, "grad_norm": 11.664291990762784, "learning_rate": 1.4196617268757984e-10, "loss": 1.2646, "step": 8784 }, { "epoch": 2.994205862304022, "grad_norm": 12.164670326259502, "learning_rate": 1.2741568493124247e-10, "loss": 1.2813, "step": 8785 }, { "epoch": 2.994546693933197, "grad_norm": 9.442762887153371, "learning_rate": 1.1365169703625445e-10, "loss": 0.9142, "step": 8786 }, { "epoch": 2.994887525562372, "grad_norm": 12.710259895980363, "learning_rate": 1.0067421116755071e-10, "loss": 1.148, "step": 8787 }, { "epoch": 2.9952283571915475, "grad_norm": 12.352054924200603, "learning_rate": 8.848322936572118e-11, "loss": 1.1761, "step": 8788 }, { "epoch": 2.9955691888207223, "grad_norm": 24.70548176979912, "learning_rate": 7.707875354867612e-11, "loss": 1.2595, "step": 8789 }, { "epoch": 2.9959100204498976, "grad_norm": 13.900626520482637, "learning_rate": 6.646078551109103e-11, "loss": 1.3354, "step": 8790 }, { "epoch": 2.996250852079073, "grad_norm": 26.336106030519122, "learning_rate": 5.662932692329648e-11, "loss": 0.9432, "step": 8791 }, { "epoch": 2.9965916837082482, "grad_norm": 12.595606589872276, "learning_rate": 4.7584379330722906e-11, "loss": 1.5888, "step": 8792 }, { "epoch": 2.9969325153374236, "grad_norm": 15.768712374316397, "learning_rate": 3.9325944156676234e-11, "loss": 1.7218, "step": 8793 }, { "epoch": 2.9972733469665984, "grad_norm": 15.457723230951412, "learning_rate": 3.18540227006725e-11, "loss": 1.1009, "step": 8794 }, { "epoch": 2.9976141785957737, "grad_norm": 16.30660002086329, "learning_rate": 2.5168616137882796e-11, "loss": 1.7677, "step": 8795 }, { "epoch": 2.997955010224949, "grad_norm": 17.234665224552728, "learning_rate": 1.926972551968831e-11, "loss": 1.2316, "step": 8796 }, { "epoch": 2.998295841854124, "grad_norm": 15.877628755801904, "learning_rate": 1.4157351773680383e-11, "loss": 1.4115, "step": 8797 }, { "epoch": 2.998636673483299, "grad_norm": 17.182786530807526, "learning_rate": 9.83149570477071e-12, "loss": 1.6813, "step": 8798 }, { "epoch": 2.9989775051124745, "grad_norm": 17.82533384489834, "learning_rate": 6.292157993526005e-12, "loss": 1.7146, "step": 8799 }, { "epoch": 2.9993183367416494, "grad_norm": 18.57568378832831, "learning_rate": 3.539339196168001e-12, "loss": 2.1321, "step": 8800 }, { "epoch": 2.9996591683708247, "grad_norm": 16.289174318712707, "learning_rate": 1.5730397456836799e-12, "loss": 1.4318, "step": 8801 }, { "epoch": 3.0, "grad_norm": 22.72764068673402, "learning_rate": 3.9325995182526445e-13, "loss": 1.3212, "step": 8802 }, { "epoch": 3.0, "step": 8802, "total_flos": 7592842027008.0, "train_loss": 2.352717310000593, "train_runtime": 5422.7235, "train_samples_per_second": 12.981, "train_steps_per_second": 1.623 } ], "logging_steps": 1, "max_steps": 8802, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7592842027008.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }