{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 8966, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_loss": 2.3125102519989014, "eval_runtime": 636.0298, "eval_samples_per_second": 99.225, "eval_steps_per_second": 1.552, "step": 0 }, { "epoch": 0.00022306491188935982, "grad_norm": 447.7063293457031, "learning_rate": 0.0, "loss": 2.6996, "step": 1 }, { "epoch": 0.00044612982377871963, "grad_norm": 410.13433837890625, "learning_rate": 2.0000000000000002e-07, "loss": 2.5839, "step": 2 }, { "epoch": 0.0006691947356680794, "grad_norm": 514.7770385742188, "learning_rate": 4.0000000000000003e-07, "loss": 2.538, "step": 3 }, { "epoch": 0.0008922596475574393, "grad_norm": 73.62993621826172, "learning_rate": 6.000000000000001e-07, "loss": 2.5648, "step": 4 }, { "epoch": 0.001115324559446799, "grad_norm": 35.405982971191406, "learning_rate": 8.000000000000001e-07, "loss": 2.5318, "step": 5 }, { "epoch": 0.0013383894713361588, "grad_norm": 31.43743133544922, "learning_rate": 1.0000000000000002e-06, "loss": 2.4902, "step": 6 }, { "epoch": 0.0015614543832255187, "grad_norm": 32.07968521118164, "learning_rate": 1.2000000000000002e-06, "loss": 2.4386, "step": 7 }, { "epoch": 0.0017845192951148785, "grad_norm": 19.430936813354492, "learning_rate": 1.4000000000000001e-06, "loss": 2.0901, "step": 8 }, { "epoch": 0.002007584207004238, "grad_norm": 20.800960540771484, "learning_rate": 1.6000000000000001e-06, "loss": 2.1737, "step": 9 }, { "epoch": 0.002230649118893598, "grad_norm": 20.56642723083496, "learning_rate": 1.8000000000000001e-06, "loss": 2.1823, "step": 10 }, { "epoch": 0.002453714030782958, "grad_norm": 20.692852020263672, "learning_rate": 2.0000000000000003e-06, "loss": 2.1278, "step": 11 }, { "epoch": 0.0026767789426723177, "grad_norm": 14.481375694274902, "learning_rate": 2.2e-06, "loss": 1.9787, "step": 12 }, { "epoch": 0.0028998438545616775, "grad_norm": 13.366188049316406, "learning_rate": 2.4000000000000003e-06, "loss": 1.9635, "step": 13 }, { "epoch": 0.0031229087664510374, "grad_norm": 14.910027503967285, "learning_rate": 2.6e-06, "loss": 1.8838, "step": 14 }, { "epoch": 0.0033459736783403972, "grad_norm": 13.10051441192627, "learning_rate": 2.8000000000000003e-06, "loss": 1.8825, "step": 15 }, { "epoch": 0.003569038590229757, "grad_norm": 14.625020027160645, "learning_rate": 3e-06, "loss": 1.6607, "step": 16 }, { "epoch": 0.0037921035021191165, "grad_norm": 9.08655834197998, "learning_rate": 3.2000000000000003e-06, "loss": 1.5911, "step": 17 }, { "epoch": 0.004015168414008476, "grad_norm": 8.972038269042969, "learning_rate": 3.4000000000000005e-06, "loss": 1.5538, "step": 18 }, { "epoch": 0.004238233325897837, "grad_norm": 7.949400901794434, "learning_rate": 3.6000000000000003e-06, "loss": 1.5478, "step": 19 }, { "epoch": 0.004461298237787196, "grad_norm": 7.048157215118408, "learning_rate": 3.8000000000000005e-06, "loss": 1.3381, "step": 20 }, { "epoch": 0.004684363149676556, "grad_norm": 7.008754730224609, "learning_rate": 4.000000000000001e-06, "loss": 1.3275, "step": 21 }, { "epoch": 0.004907428061565916, "grad_norm": 6.20928955078125, "learning_rate": 4.2000000000000004e-06, "loss": 1.2469, "step": 22 }, { "epoch": 0.005130492973455275, "grad_norm": 4.8463029861450195, "learning_rate": 4.4e-06, "loss": 1.1167, "step": 23 }, { "epoch": 0.005353557885344635, "grad_norm": 20.94325065612793, "learning_rate": 4.600000000000001e-06, "loss": 1.1391, "step": 24 }, { "epoch": 0.005576622797233995, "grad_norm": 3.783233404159546, "learning_rate": 4.800000000000001e-06, "loss": 1.0692, "step": 25 }, { "epoch": 0.005799687709123355, "grad_norm": 3.780400276184082, "learning_rate": 5e-06, "loss": 1.0715, "step": 26 }, { "epoch": 0.0060227526210127145, "grad_norm": 3.059216260910034, "learning_rate": 5.2e-06, "loss": 1.0057, "step": 27 }, { "epoch": 0.006245817532902075, "grad_norm": 2.7420809268951416, "learning_rate": 5.400000000000001e-06, "loss": 0.9421, "step": 28 }, { "epoch": 0.006468882444791434, "grad_norm": 2.8878915309906006, "learning_rate": 5.600000000000001e-06, "loss": 0.9625, "step": 29 }, { "epoch": 0.0066919473566807944, "grad_norm": 2.9568190574645996, "learning_rate": 5.8e-06, "loss": 0.924, "step": 30 }, { "epoch": 0.006915012268570154, "grad_norm": 2.239628314971924, "learning_rate": 6e-06, "loss": 0.8886, "step": 31 }, { "epoch": 0.007138077180459514, "grad_norm": 2.2312073707580566, "learning_rate": 6.200000000000001e-06, "loss": 0.9105, "step": 32 }, { "epoch": 0.0073611420923488735, "grad_norm": 1.7348324060440063, "learning_rate": 6.4000000000000006e-06, "loss": 0.8685, "step": 33 }, { "epoch": 0.007584207004238233, "grad_norm": 1.3611754179000854, "learning_rate": 6.600000000000001e-06, "loss": 0.8088, "step": 34 }, { "epoch": 0.007807271916127593, "grad_norm": 1.2034807205200195, "learning_rate": 6.800000000000001e-06, "loss": 0.8101, "step": 35 }, { "epoch": 0.008030336828016953, "grad_norm": 1.3573353290557861, "learning_rate": 7e-06, "loss": 0.8048, "step": 36 }, { "epoch": 0.008253401739906312, "grad_norm": 1.4660226106643677, "learning_rate": 7.2000000000000005e-06, "loss": 0.7663, "step": 37 }, { "epoch": 0.008476466651795673, "grad_norm": 1.4917473793029785, "learning_rate": 7.4e-06, "loss": 0.7847, "step": 38 }, { "epoch": 0.008699531563685033, "grad_norm": 0.9017640352249146, "learning_rate": 7.600000000000001e-06, "loss": 0.7472, "step": 39 }, { "epoch": 0.008922596475574392, "grad_norm": 0.9448667764663696, "learning_rate": 7.800000000000002e-06, "loss": 0.7384, "step": 40 }, { "epoch": 0.009145661387463751, "grad_norm": 1.0112793445587158, "learning_rate": 8.000000000000001e-06, "loss": 0.735, "step": 41 }, { "epoch": 0.009368726299353113, "grad_norm": 0.8147673606872559, "learning_rate": 8.2e-06, "loss": 0.7463, "step": 42 }, { "epoch": 0.009591791211242472, "grad_norm": 0.8109986782073975, "learning_rate": 8.400000000000001e-06, "loss": 0.7275, "step": 43 }, { "epoch": 0.009814856123131831, "grad_norm": 0.6843619346618652, "learning_rate": 8.6e-06, "loss": 0.7413, "step": 44 }, { "epoch": 0.01003792103502119, "grad_norm": 0.5661330223083496, "learning_rate": 8.8e-06, "loss": 0.7226, "step": 45 }, { "epoch": 0.01026098594691055, "grad_norm": 0.8250797986984253, "learning_rate": 9e-06, "loss": 0.7099, "step": 46 }, { "epoch": 0.010484050858799911, "grad_norm": 0.5656530857086182, "learning_rate": 9.200000000000002e-06, "loss": 0.7145, "step": 47 }, { "epoch": 0.01070711577068927, "grad_norm": 0.7327658534049988, "learning_rate": 9.4e-06, "loss": 0.716, "step": 48 }, { "epoch": 0.01093018068257863, "grad_norm": 0.5881125330924988, "learning_rate": 9.600000000000001e-06, "loss": 0.6747, "step": 49 }, { "epoch": 0.01115324559446799, "grad_norm": 0.5523539185523987, "learning_rate": 9.800000000000001e-06, "loss": 0.6521, "step": 50 }, { "epoch": 0.01137631050635735, "grad_norm": 0.5446374416351318, "learning_rate": 1e-05, "loss": 0.6752, "step": 51 }, { "epoch": 0.01159937541824671, "grad_norm": 0.475178599357605, "learning_rate": 1.02e-05, "loss": 0.7108, "step": 52 }, { "epoch": 0.01182244033013607, "grad_norm": 0.7163774967193604, "learning_rate": 1.04e-05, "loss": 0.6717, "step": 53 }, { "epoch": 0.012045505242025429, "grad_norm": 0.4185207784175873, "learning_rate": 1.0600000000000002e-05, "loss": 0.6645, "step": 54 }, { "epoch": 0.012268570153914788, "grad_norm": 0.43035364151000977, "learning_rate": 1.0800000000000002e-05, "loss": 0.6814, "step": 55 }, { "epoch": 0.01249163506580415, "grad_norm": 0.37698522210121155, "learning_rate": 1.1000000000000001e-05, "loss": 0.7271, "step": 56 }, { "epoch": 0.012714699977693509, "grad_norm": 0.41325655579566956, "learning_rate": 1.1200000000000001e-05, "loss": 0.6718, "step": 57 }, { "epoch": 0.012937764889582868, "grad_norm": 0.4455505907535553, "learning_rate": 1.14e-05, "loss": 0.6461, "step": 58 }, { "epoch": 0.013160829801472228, "grad_norm": 0.39668968319892883, "learning_rate": 1.16e-05, "loss": 0.6744, "step": 59 }, { "epoch": 0.013383894713361589, "grad_norm": 0.43382027745246887, "learning_rate": 1.18e-05, "loss": 0.686, "step": 60 }, { "epoch": 0.013606959625250948, "grad_norm": 0.3178131878376007, "learning_rate": 1.2e-05, "loss": 0.6461, "step": 61 }, { "epoch": 0.013830024537140308, "grad_norm": 0.36169150471687317, "learning_rate": 1.22e-05, "loss": 0.6521, "step": 62 }, { "epoch": 0.014053089449029667, "grad_norm": 0.3932245671749115, "learning_rate": 1.2400000000000002e-05, "loss": 0.7118, "step": 63 }, { "epoch": 0.014276154360919028, "grad_norm": 0.3924449384212494, "learning_rate": 1.2600000000000001e-05, "loss": 0.6723, "step": 64 }, { "epoch": 0.014499219272808388, "grad_norm": 0.32354405522346497, "learning_rate": 1.2800000000000001e-05, "loss": 0.6589, "step": 65 }, { "epoch": 0.014722284184697747, "grad_norm": 0.3272797465324402, "learning_rate": 1.3000000000000001e-05, "loss": 0.6413, "step": 66 }, { "epoch": 0.014945349096587107, "grad_norm": 0.36967551708221436, "learning_rate": 1.3200000000000002e-05, "loss": 0.6432, "step": 67 }, { "epoch": 0.015168414008476466, "grad_norm": 0.2998528480529785, "learning_rate": 1.3400000000000002e-05, "loss": 0.6543, "step": 68 }, { "epoch": 0.015391478920365827, "grad_norm": 0.30827096104621887, "learning_rate": 1.3600000000000002e-05, "loss": 0.6715, "step": 69 }, { "epoch": 0.015614543832255186, "grad_norm": 0.3031168580055237, "learning_rate": 1.38e-05, "loss": 0.6504, "step": 70 }, { "epoch": 0.015837608744144548, "grad_norm": 0.3063329756259918, "learning_rate": 1.4e-05, "loss": 0.6214, "step": 71 }, { "epoch": 0.016060673656033905, "grad_norm": 0.2707573175430298, "learning_rate": 1.4200000000000001e-05, "loss": 0.6347, "step": 72 }, { "epoch": 0.016283738567923266, "grad_norm": 0.3051629066467285, "learning_rate": 1.4400000000000001e-05, "loss": 0.6318, "step": 73 }, { "epoch": 0.016506803479812624, "grad_norm": 0.3260219991207123, "learning_rate": 1.46e-05, "loss": 0.6219, "step": 74 }, { "epoch": 0.016729868391701985, "grad_norm": 0.2733941376209259, "learning_rate": 1.48e-05, "loss": 0.6384, "step": 75 }, { "epoch": 0.016952933303591346, "grad_norm": 0.35662564635276794, "learning_rate": 1.5000000000000002e-05, "loss": 0.622, "step": 76 }, { "epoch": 0.017175998215480704, "grad_norm": 0.31423047184944153, "learning_rate": 1.5200000000000002e-05, "loss": 0.6344, "step": 77 }, { "epoch": 0.017399063127370065, "grad_norm": 0.3120841681957245, "learning_rate": 1.54e-05, "loss": 0.6566, "step": 78 }, { "epoch": 0.017622128039259423, "grad_norm": 0.30338573455810547, "learning_rate": 1.5600000000000003e-05, "loss": 0.6537, "step": 79 }, { "epoch": 0.017845192951148784, "grad_norm": 0.3202762007713318, "learning_rate": 1.58e-05, "loss": 0.6705, "step": 80 }, { "epoch": 0.018068257863038145, "grad_norm": 0.3163682520389557, "learning_rate": 1.6000000000000003e-05, "loss": 0.6175, "step": 81 }, { "epoch": 0.018291322774927503, "grad_norm": 0.2831353545188904, "learning_rate": 1.62e-05, "loss": 0.6301, "step": 82 }, { "epoch": 0.018514387686816864, "grad_norm": 0.267448365688324, "learning_rate": 1.64e-05, "loss": 0.6224, "step": 83 }, { "epoch": 0.018737452598706225, "grad_norm": 0.2999647259712219, "learning_rate": 1.66e-05, "loss": 0.6294, "step": 84 }, { "epoch": 0.018960517510595583, "grad_norm": 0.27424749732017517, "learning_rate": 1.6800000000000002e-05, "loss": 0.5998, "step": 85 }, { "epoch": 0.019183582422484944, "grad_norm": 0.2582665979862213, "learning_rate": 1.7e-05, "loss": 0.6043, "step": 86 }, { "epoch": 0.0194066473343743, "grad_norm": 0.2448103129863739, "learning_rate": 1.72e-05, "loss": 0.631, "step": 87 }, { "epoch": 0.019629712246263663, "grad_norm": 0.25323718786239624, "learning_rate": 1.7400000000000003e-05, "loss": 0.6248, "step": 88 }, { "epoch": 0.019852777158153024, "grad_norm": 0.32291799783706665, "learning_rate": 1.76e-05, "loss": 0.5797, "step": 89 }, { "epoch": 0.02007584207004238, "grad_norm": 0.3439197242259979, "learning_rate": 1.7800000000000002e-05, "loss": 0.6141, "step": 90 }, { "epoch": 0.020298906981931743, "grad_norm": 0.30653074383735657, "learning_rate": 1.8e-05, "loss": 0.6226, "step": 91 }, { "epoch": 0.0205219718938211, "grad_norm": 0.5994735956192017, "learning_rate": 1.8200000000000002e-05, "loss": 0.614, "step": 92 }, { "epoch": 0.02074503680571046, "grad_norm": 0.30260932445526123, "learning_rate": 1.8400000000000003e-05, "loss": 0.6224, "step": 93 }, { "epoch": 0.020968101717599823, "grad_norm": 0.2914034426212311, "learning_rate": 1.86e-05, "loss": 0.6201, "step": 94 }, { "epoch": 0.02119116662948918, "grad_norm": 0.28529268503189087, "learning_rate": 1.88e-05, "loss": 0.6046, "step": 95 }, { "epoch": 0.02141423154137854, "grad_norm": 0.26552823185920715, "learning_rate": 1.9e-05, "loss": 0.6058, "step": 96 }, { "epoch": 0.021637296453267903, "grad_norm": 0.4168465733528137, "learning_rate": 1.9200000000000003e-05, "loss": 0.6289, "step": 97 }, { "epoch": 0.02186036136515726, "grad_norm": 0.40763556957244873, "learning_rate": 1.94e-05, "loss": 0.6113, "step": 98 }, { "epoch": 0.02208342627704662, "grad_norm": 0.29686489701271057, "learning_rate": 1.9600000000000002e-05, "loss": 0.6288, "step": 99 }, { "epoch": 0.02230649118893598, "grad_norm": 0.27713605761528015, "learning_rate": 1.98e-05, "loss": 0.6404, "step": 100 }, { "epoch": 0.02252955610082534, "grad_norm": 0.2593631148338318, "learning_rate": 2e-05, "loss": 0.5652, "step": 101 }, { "epoch": 0.0227526210127147, "grad_norm": 0.2575121819972992, "learning_rate": 1.999999972306855e-05, "loss": 0.6062, "step": 102 }, { "epoch": 0.02297568592460406, "grad_norm": 0.24929626286029816, "learning_rate": 1.999999889227421e-05, "loss": 0.6336, "step": 103 }, { "epoch": 0.02319875083649342, "grad_norm": 0.2693648040294647, "learning_rate": 1.9999997507617033e-05, "loss": 0.6611, "step": 104 }, { "epoch": 0.023421815748382778, "grad_norm": 0.5212802290916443, "learning_rate": 1.9999995569097088e-05, "loss": 0.6162, "step": 105 }, { "epoch": 0.02364488066027214, "grad_norm": 0.25279495120048523, "learning_rate": 1.999999307671449e-05, "loss": 0.6062, "step": 106 }, { "epoch": 0.0238679455721615, "grad_norm": 0.2529296278953552, "learning_rate": 1.999999003046937e-05, "loss": 0.6164, "step": 107 }, { "epoch": 0.024091010484050858, "grad_norm": 0.3054715096950531, "learning_rate": 1.9999986430361896e-05, "loss": 0.5984, "step": 108 }, { "epoch": 0.02431407539594022, "grad_norm": 0.26101136207580566, "learning_rate": 1.9999982276392274e-05, "loss": 0.5809, "step": 109 }, { "epoch": 0.024537140307829577, "grad_norm": 0.26034069061279297, "learning_rate": 1.9999977568560734e-05, "loss": 0.6026, "step": 110 }, { "epoch": 0.024760205219718938, "grad_norm": 0.2570051848888397, "learning_rate": 1.999997230686753e-05, "loss": 0.6299, "step": 111 }, { "epoch": 0.0249832701316083, "grad_norm": 0.2621450424194336, "learning_rate": 1.999996649131296e-05, "loss": 0.6141, "step": 112 }, { "epoch": 0.025206335043497657, "grad_norm": 0.2425510734319687, "learning_rate": 1.999996012189734e-05, "loss": 0.6184, "step": 113 }, { "epoch": 0.025429399955387018, "grad_norm": 0.28203898668289185, "learning_rate": 1.999995319862103e-05, "loss": 0.5922, "step": 114 }, { "epoch": 0.02565246486727638, "grad_norm": 0.2976488173007965, "learning_rate": 1.9999945721484407e-05, "loss": 0.586, "step": 115 }, { "epoch": 0.025875529779165737, "grad_norm": 0.27894967794418335, "learning_rate": 1.999993769048789e-05, "loss": 0.6099, "step": 116 }, { "epoch": 0.026098594691055098, "grad_norm": 0.2924571931362152, "learning_rate": 1.999992910563192e-05, "loss": 0.5646, "step": 117 }, { "epoch": 0.026321659602944456, "grad_norm": 0.2446456104516983, "learning_rate": 1.9999919966916976e-05, "loss": 0.5722, "step": 118 }, { "epoch": 0.026544724514833817, "grad_norm": 0.23295633494853973, "learning_rate": 1.9999910274343562e-05, "loss": 0.5806, "step": 119 }, { "epoch": 0.026767789426723178, "grad_norm": 0.24767954647541046, "learning_rate": 1.999990002791221e-05, "loss": 0.6003, "step": 120 }, { "epoch": 0.026990854338612535, "grad_norm": 0.25390875339508057, "learning_rate": 1.99998892276235e-05, "loss": 0.5798, "step": 121 }, { "epoch": 0.027213919250501897, "grad_norm": 0.22989511489868164, "learning_rate": 1.999987787347802e-05, "loss": 0.5873, "step": 122 }, { "epoch": 0.027436984162391254, "grad_norm": 0.4493498206138611, "learning_rate": 1.99998659654764e-05, "loss": 0.5994, "step": 123 }, { "epoch": 0.027660049074280615, "grad_norm": 0.2416534572839737, "learning_rate": 1.99998535036193e-05, "loss": 0.5787, "step": 124 }, { "epoch": 0.027883113986169977, "grad_norm": 0.23413562774658203, "learning_rate": 1.9999840487907414e-05, "loss": 0.608, "step": 125 }, { "epoch": 0.028106178898059334, "grad_norm": 0.28015846014022827, "learning_rate": 1.9999826918341462e-05, "loss": 0.6034, "step": 126 }, { "epoch": 0.028329243809948695, "grad_norm": 0.2494441717863083, "learning_rate": 1.999981279492219e-05, "loss": 0.6256, "step": 127 }, { "epoch": 0.028552308721838057, "grad_norm": 0.3094983398914337, "learning_rate": 1.9999798117650385e-05, "loss": 0.6351, "step": 128 }, { "epoch": 0.028775373633727414, "grad_norm": 0.24032524228096008, "learning_rate": 1.9999782886526863e-05, "loss": 0.5703, "step": 129 }, { "epoch": 0.028998438545616775, "grad_norm": 0.24950377643108368, "learning_rate": 1.9999767101552458e-05, "loss": 0.5952, "step": 130 }, { "epoch": 0.029221503457506133, "grad_norm": 0.24877335131168365, "learning_rate": 1.999975076272805e-05, "loss": 0.6143, "step": 131 }, { "epoch": 0.029444568369395494, "grad_norm": 0.2579665184020996, "learning_rate": 1.999973387005455e-05, "loss": 0.6063, "step": 132 }, { "epoch": 0.029667633281284855, "grad_norm": 0.2793569564819336, "learning_rate": 1.9999716423532884e-05, "loss": 0.613, "step": 133 }, { "epoch": 0.029890698193174213, "grad_norm": 0.23061081767082214, "learning_rate": 1.999969842316402e-05, "loss": 0.5918, "step": 134 }, { "epoch": 0.030113763105063574, "grad_norm": 0.2353406399488449, "learning_rate": 1.999967986894896e-05, "loss": 0.585, "step": 135 }, { "epoch": 0.030336828016952932, "grad_norm": 0.2254764884710312, "learning_rate": 1.9999660760888722e-05, "loss": 0.611, "step": 136 }, { "epoch": 0.030559892928842293, "grad_norm": 0.24640028178691864, "learning_rate": 1.9999641098984378e-05, "loss": 0.5911, "step": 137 }, { "epoch": 0.030782957840731654, "grad_norm": 0.22932305932044983, "learning_rate": 1.9999620883237004e-05, "loss": 0.5999, "step": 138 }, { "epoch": 0.031006022752621012, "grad_norm": 0.2379560023546219, "learning_rate": 1.999960011364773e-05, "loss": 0.5987, "step": 139 }, { "epoch": 0.031229087664510373, "grad_norm": 0.22063873708248138, "learning_rate": 1.99995787902177e-05, "loss": 0.5867, "step": 140 }, { "epoch": 0.03145215257639973, "grad_norm": 0.23651117086410522, "learning_rate": 1.9999556912948096e-05, "loss": 0.5809, "step": 141 }, { "epoch": 0.031675217488289095, "grad_norm": 0.2621421813964844, "learning_rate": 1.9999534481840134e-05, "loss": 0.5935, "step": 142 }, { "epoch": 0.03189828240017845, "grad_norm": 0.24506062269210815, "learning_rate": 1.9999511496895047e-05, "loss": 0.5931, "step": 143 }, { "epoch": 0.03212134731206781, "grad_norm": 0.2224111109972, "learning_rate": 1.999948795811412e-05, "loss": 0.6156, "step": 144 }, { "epoch": 0.03234441222395717, "grad_norm": 0.23910044133663177, "learning_rate": 1.9999463865498644e-05, "loss": 0.5839, "step": 145 }, { "epoch": 0.03256747713584653, "grad_norm": 0.2555723786354065, "learning_rate": 1.9999439219049964e-05, "loss": 0.6474, "step": 146 }, { "epoch": 0.03279054204773589, "grad_norm": 0.25059348344802856, "learning_rate": 1.9999414018769442e-05, "loss": 0.5846, "step": 147 }, { "epoch": 0.03301360695962525, "grad_norm": 0.22700609266757965, "learning_rate": 1.9999388264658467e-05, "loss": 0.5867, "step": 148 }, { "epoch": 0.03323667187151461, "grad_norm": 0.25858354568481445, "learning_rate": 1.9999361956718476e-05, "loss": 0.5984, "step": 149 }, { "epoch": 0.03345973678340397, "grad_norm": 0.2813456356525421, "learning_rate": 1.9999335094950922e-05, "loss": 0.5943, "step": 150 }, { "epoch": 0.03368280169529333, "grad_norm": 0.288655549287796, "learning_rate": 1.9999307679357293e-05, "loss": 0.5975, "step": 151 }, { "epoch": 0.03390586660718269, "grad_norm": 0.23721511662006378, "learning_rate": 1.9999279709939102e-05, "loss": 0.5637, "step": 152 }, { "epoch": 0.03412893151907205, "grad_norm": 0.23953364789485931, "learning_rate": 1.999925118669791e-05, "loss": 0.5693, "step": 153 }, { "epoch": 0.03435199643096141, "grad_norm": 0.23194071650505066, "learning_rate": 1.9999222109635283e-05, "loss": 0.5446, "step": 154 }, { "epoch": 0.03457506134285077, "grad_norm": 0.2160234898328781, "learning_rate": 1.999919247875284e-05, "loss": 0.5812, "step": 155 }, { "epoch": 0.03479812625474013, "grad_norm": 0.22934173047542572, "learning_rate": 1.999916229405222e-05, "loss": 0.6149, "step": 156 }, { "epoch": 0.03502119116662949, "grad_norm": 0.24892570078372955, "learning_rate": 1.999913155553509e-05, "loss": 0.6053, "step": 157 }, { "epoch": 0.035244256078518846, "grad_norm": 0.2235739678144455, "learning_rate": 1.9999100263203165e-05, "loss": 0.5799, "step": 158 }, { "epoch": 0.03546732099040821, "grad_norm": 0.21852736175060272, "learning_rate": 1.9999068417058168e-05, "loss": 0.5793, "step": 159 }, { "epoch": 0.03569038590229757, "grad_norm": 0.269131600856781, "learning_rate": 1.9999036017101864e-05, "loss": 0.5817, "step": 160 }, { "epoch": 0.035913450814186926, "grad_norm": 0.22516575455665588, "learning_rate": 1.999900306333605e-05, "loss": 0.6, "step": 161 }, { "epoch": 0.03613651572607629, "grad_norm": 0.24313850700855255, "learning_rate": 1.999896955576255e-05, "loss": 0.5738, "step": 162 }, { "epoch": 0.03635958063796565, "grad_norm": 0.26203158497810364, "learning_rate": 1.999893549438322e-05, "loss": 0.5687, "step": 163 }, { "epoch": 0.036582645549855006, "grad_norm": 0.23275348544120789, "learning_rate": 1.9998900879199948e-05, "loss": 0.5698, "step": 164 }, { "epoch": 0.03680571046174437, "grad_norm": 0.2560306489467621, "learning_rate": 1.9998865710214646e-05, "loss": 0.5877, "step": 165 }, { "epoch": 0.03702877537363373, "grad_norm": 0.2175760716199875, "learning_rate": 1.999882998742927e-05, "loss": 0.5797, "step": 166 }, { "epoch": 0.037251840285523086, "grad_norm": 0.22381433844566345, "learning_rate": 1.999879371084579e-05, "loss": 0.5642, "step": 167 }, { "epoch": 0.03747490519741245, "grad_norm": 0.2363629788160324, "learning_rate": 1.9998756880466224e-05, "loss": 0.5651, "step": 168 }, { "epoch": 0.03769797010930181, "grad_norm": 0.22718748450279236, "learning_rate": 1.9998719496292603e-05, "loss": 0.5867, "step": 169 }, { "epoch": 0.037921035021191166, "grad_norm": 0.23127657175064087, "learning_rate": 1.9998681558327005e-05, "loss": 0.5724, "step": 170 }, { "epoch": 0.03814409993308052, "grad_norm": 0.23175480961799622, "learning_rate": 1.9998643066571527e-05, "loss": 0.5919, "step": 171 }, { "epoch": 0.03836716484496989, "grad_norm": 0.22871741652488708, "learning_rate": 1.9998604021028304e-05, "loss": 0.5653, "step": 172 }, { "epoch": 0.038590229756859246, "grad_norm": 0.21524189412593842, "learning_rate": 1.999856442169949e-05, "loss": 0.6123, "step": 173 }, { "epoch": 0.0388132946687486, "grad_norm": 0.23523908853530884, "learning_rate": 1.999852426858729e-05, "loss": 0.5931, "step": 174 }, { "epoch": 0.03903635958063797, "grad_norm": 0.23540736734867096, "learning_rate": 1.9998483561693926e-05, "loss": 0.5614, "step": 175 }, { "epoch": 0.039259424492527326, "grad_norm": 0.22590956091880798, "learning_rate": 1.999844230102164e-05, "loss": 0.5917, "step": 176 }, { "epoch": 0.03948248940441668, "grad_norm": 0.22635167837142944, "learning_rate": 1.999840048657273e-05, "loss": 0.5886, "step": 177 }, { "epoch": 0.03970555431630605, "grad_norm": 0.24712443351745605, "learning_rate": 1.9998358118349513e-05, "loss": 0.5936, "step": 178 }, { "epoch": 0.039928619228195406, "grad_norm": 0.266133576631546, "learning_rate": 1.999831519635433e-05, "loss": 0.5662, "step": 179 }, { "epoch": 0.04015168414008476, "grad_norm": 0.2172316312789917, "learning_rate": 1.9998271720589558e-05, "loss": 0.5679, "step": 180 }, { "epoch": 0.04037474905197413, "grad_norm": 0.3194250464439392, "learning_rate": 1.999822769105761e-05, "loss": 0.6105, "step": 181 }, { "epoch": 0.040597813963863486, "grad_norm": 0.2604449391365051, "learning_rate": 1.9998183107760915e-05, "loss": 0.5942, "step": 182 }, { "epoch": 0.04082087887575284, "grad_norm": 0.26620209217071533, "learning_rate": 1.9998137970701955e-05, "loss": 0.5859, "step": 183 }, { "epoch": 0.0410439437876422, "grad_norm": 0.2345753312110901, "learning_rate": 1.9998092279883215e-05, "loss": 0.5939, "step": 184 }, { "epoch": 0.041267008699531565, "grad_norm": 0.22213764488697052, "learning_rate": 1.999804603530724e-05, "loss": 0.5815, "step": 185 }, { "epoch": 0.04149007361142092, "grad_norm": 0.23276378214359283, "learning_rate": 1.9997999236976587e-05, "loss": 0.5842, "step": 186 }, { "epoch": 0.04171313852331028, "grad_norm": 0.22480995953083038, "learning_rate": 1.9997951884893843e-05, "loss": 0.5897, "step": 187 }, { "epoch": 0.041936203435199645, "grad_norm": 0.22761783003807068, "learning_rate": 1.9997903979061635e-05, "loss": 0.5873, "step": 188 }, { "epoch": 0.042159268347089, "grad_norm": 0.27698761224746704, "learning_rate": 1.9997855519482614e-05, "loss": 0.5933, "step": 189 }, { "epoch": 0.04238233325897836, "grad_norm": 0.2002975046634674, "learning_rate": 1.9997806506159466e-05, "loss": 0.5698, "step": 190 }, { "epoch": 0.042605398170867725, "grad_norm": 0.233673095703125, "learning_rate": 1.9997756939094905e-05, "loss": 0.5765, "step": 191 }, { "epoch": 0.04282846308275708, "grad_norm": 0.26376616954803467, "learning_rate": 1.999770681829168e-05, "loss": 0.5741, "step": 192 }, { "epoch": 0.04305152799464644, "grad_norm": 0.22886492311954498, "learning_rate": 1.9997656143752556e-05, "loss": 0.5709, "step": 193 }, { "epoch": 0.043274592906535805, "grad_norm": 0.3004070520401001, "learning_rate": 1.9997604915480352e-05, "loss": 0.5514, "step": 194 }, { "epoch": 0.04349765781842516, "grad_norm": 0.22209997475147247, "learning_rate": 1.99975531334779e-05, "loss": 0.6095, "step": 195 }, { "epoch": 0.04372072273031452, "grad_norm": 0.20531293749809265, "learning_rate": 1.9997500797748067e-05, "loss": 0.5618, "step": 196 }, { "epoch": 0.04394378764220388, "grad_norm": 0.20507480204105377, "learning_rate": 1.9997447908293753e-05, "loss": 0.5465, "step": 197 }, { "epoch": 0.04416685255409324, "grad_norm": 0.22681620717048645, "learning_rate": 1.999739446511789e-05, "loss": 0.5546, "step": 198 }, { "epoch": 0.0443899174659826, "grad_norm": 0.21772046387195587, "learning_rate": 1.999734046822343e-05, "loss": 0.5934, "step": 199 }, { "epoch": 0.04461298237787196, "grad_norm": 0.22321215271949768, "learning_rate": 1.9997285917613375e-05, "loss": 0.579, "step": 200 }, { "epoch": 0.04483604728976132, "grad_norm": 0.21166378259658813, "learning_rate": 1.9997230813290737e-05, "loss": 0.5978, "step": 201 }, { "epoch": 0.04505911220165068, "grad_norm": 0.23722383379936218, "learning_rate": 1.999717515525857e-05, "loss": 0.5747, "step": 202 }, { "epoch": 0.04528217711354004, "grad_norm": 0.36909613013267517, "learning_rate": 1.9997118943519962e-05, "loss": 0.5417, "step": 203 }, { "epoch": 0.0455052420254294, "grad_norm": 0.23573465645313263, "learning_rate": 1.9997062178078023e-05, "loss": 0.5971, "step": 204 }, { "epoch": 0.04572830693731876, "grad_norm": 0.25214946269989014, "learning_rate": 1.9997004858935894e-05, "loss": 0.5852, "step": 205 }, { "epoch": 0.04595137184920812, "grad_norm": 0.20637395977973938, "learning_rate": 1.9996946986096754e-05, "loss": 0.5394, "step": 206 }, { "epoch": 0.046174436761097476, "grad_norm": 0.21749068796634674, "learning_rate": 1.9996888559563804e-05, "loss": 0.5662, "step": 207 }, { "epoch": 0.04639750167298684, "grad_norm": 0.2306276559829712, "learning_rate": 1.9996829579340284e-05, "loss": 0.6038, "step": 208 }, { "epoch": 0.0466205665848762, "grad_norm": 0.22346408665180206, "learning_rate": 1.999677004542946e-05, "loss": 0.6024, "step": 209 }, { "epoch": 0.046843631496765556, "grad_norm": 0.22103498876094818, "learning_rate": 1.9996709957834627e-05, "loss": 0.5805, "step": 210 }, { "epoch": 0.04706669640865492, "grad_norm": 0.2120116651058197, "learning_rate": 1.9996649316559118e-05, "loss": 0.5712, "step": 211 }, { "epoch": 0.04728976132054428, "grad_norm": 0.24006275832653046, "learning_rate": 1.9996588121606286e-05, "loss": 0.5487, "step": 212 }, { "epoch": 0.047512826232433636, "grad_norm": 0.2095099836587906, "learning_rate": 1.9996526372979522e-05, "loss": 0.555, "step": 213 }, { "epoch": 0.047735891144323, "grad_norm": 0.23283496499061584, "learning_rate": 1.999646407068225e-05, "loss": 0.6025, "step": 214 }, { "epoch": 0.04795895605621236, "grad_norm": 0.2015821784734726, "learning_rate": 1.9996401214717912e-05, "loss": 0.5725, "step": 215 }, { "epoch": 0.048182020968101716, "grad_norm": 0.2395429164171219, "learning_rate": 1.999633780509e-05, "loss": 0.5549, "step": 216 }, { "epoch": 0.04840508587999108, "grad_norm": 0.2157035768032074, "learning_rate": 1.9996273841802017e-05, "loss": 0.5644, "step": 217 }, { "epoch": 0.04862815079188044, "grad_norm": 0.2156548947095871, "learning_rate": 1.9996209324857516e-05, "loss": 0.5901, "step": 218 }, { "epoch": 0.048851215703769796, "grad_norm": 0.2213698923587799, "learning_rate": 1.999614425426006e-05, "loss": 0.5463, "step": 219 }, { "epoch": 0.049074280615659154, "grad_norm": 0.21965420246124268, "learning_rate": 1.9996078630013253e-05, "loss": 0.5741, "step": 220 }, { "epoch": 0.04929734552754852, "grad_norm": 0.213240385055542, "learning_rate": 1.999601245212074e-05, "loss": 0.5698, "step": 221 }, { "epoch": 0.049520410439437876, "grad_norm": 0.21078208088874817, "learning_rate": 1.9995945720586177e-05, "loss": 0.5923, "step": 222 }, { "epoch": 0.04974347535132723, "grad_norm": 0.21629688143730164, "learning_rate": 1.9995878435413264e-05, "loss": 0.5438, "step": 223 }, { "epoch": 0.0499665402632166, "grad_norm": 0.22872765362262726, "learning_rate": 1.9995810596605725e-05, "loss": 0.5543, "step": 224 }, { "epoch": 0.050189605175105956, "grad_norm": 0.2012244164943695, "learning_rate": 1.999574220416732e-05, "loss": 0.557, "step": 225 }, { "epoch": 0.05041267008699531, "grad_norm": 0.21912412345409393, "learning_rate": 1.9995673258101837e-05, "loss": 0.5816, "step": 226 }, { "epoch": 0.05063573499888468, "grad_norm": 0.22358618676662445, "learning_rate": 1.999560375841309e-05, "loss": 0.6222, "step": 227 }, { "epoch": 0.050858799910774036, "grad_norm": 0.22544872760772705, "learning_rate": 1.9995533705104936e-05, "loss": 0.5481, "step": 228 }, { "epoch": 0.05108186482266339, "grad_norm": 0.20495697855949402, "learning_rate": 1.999546309818125e-05, "loss": 0.5371, "step": 229 }, { "epoch": 0.05130492973455276, "grad_norm": 0.22267979383468628, "learning_rate": 1.9995391937645944e-05, "loss": 0.5916, "step": 230 }, { "epoch": 0.051527994646442116, "grad_norm": 0.200364887714386, "learning_rate": 1.9995320223502958e-05, "loss": 0.541, "step": 231 }, { "epoch": 0.05175105955833147, "grad_norm": 0.20090313255786896, "learning_rate": 1.9995247955756267e-05, "loss": 0.565, "step": 232 }, { "epoch": 0.05197412447022083, "grad_norm": 5.10443639755249, "learning_rate": 1.9995175134409868e-05, "loss": 0.6056, "step": 233 }, { "epoch": 0.052197189382110196, "grad_norm": 0.30669254064559937, "learning_rate": 1.99951017594678e-05, "loss": 0.589, "step": 234 }, { "epoch": 0.05242025429399955, "grad_norm": 0.21791155636310577, "learning_rate": 1.9995027830934125e-05, "loss": 0.5438, "step": 235 }, { "epoch": 0.05264331920588891, "grad_norm": 0.25966677069664, "learning_rate": 1.9994953348812937e-05, "loss": 0.582, "step": 236 }, { "epoch": 0.052866384117778276, "grad_norm": 0.24935361742973328, "learning_rate": 1.9994878313108362e-05, "loss": 0.5934, "step": 237 }, { "epoch": 0.05308944902966763, "grad_norm": 0.22276830673217773, "learning_rate": 1.9994802723824557e-05, "loss": 0.5845, "step": 238 }, { "epoch": 0.05331251394155699, "grad_norm": 0.2349478304386139, "learning_rate": 1.9994726580965704e-05, "loss": 0.5716, "step": 239 }, { "epoch": 0.053535578853446356, "grad_norm": 0.22332008183002472, "learning_rate": 1.9994649884536026e-05, "loss": 0.6009, "step": 240 }, { "epoch": 0.05375864376533571, "grad_norm": 0.19660501182079315, "learning_rate": 1.9994572634539767e-05, "loss": 0.5574, "step": 241 }, { "epoch": 0.05398170867722507, "grad_norm": 0.2085440754890442, "learning_rate": 1.999449483098121e-05, "loss": 0.5676, "step": 242 }, { "epoch": 0.054204773589114436, "grad_norm": 0.21077819168567657, "learning_rate": 1.999441647386466e-05, "loss": 0.5507, "step": 243 }, { "epoch": 0.05442783850100379, "grad_norm": 0.2927658259868622, "learning_rate": 1.9994337563194457e-05, "loss": 0.5834, "step": 244 }, { "epoch": 0.05465090341289315, "grad_norm": 0.22845080494880676, "learning_rate": 1.9994258098974974e-05, "loss": 0.5524, "step": 245 }, { "epoch": 0.05487396832478251, "grad_norm": 0.24501702189445496, "learning_rate": 1.999417808121061e-05, "loss": 0.5376, "step": 246 }, { "epoch": 0.05509703323667187, "grad_norm": 0.19732032716274261, "learning_rate": 1.99940975099058e-05, "loss": 0.5697, "step": 247 }, { "epoch": 0.05532009814856123, "grad_norm": 0.21217837929725647, "learning_rate": 1.9994016385065005e-05, "loss": 0.5941, "step": 248 }, { "epoch": 0.05554316306045059, "grad_norm": 0.25333619117736816, "learning_rate": 1.999393470669272e-05, "loss": 0.5482, "step": 249 }, { "epoch": 0.05576622797233995, "grad_norm": 0.21620452404022217, "learning_rate": 1.9993852474793457e-05, "loss": 0.5712, "step": 250 }, { "epoch": 0.05598929288422931, "grad_norm": 0.20523187518119812, "learning_rate": 1.9993769689371788e-05, "loss": 0.5676, "step": 251 }, { "epoch": 0.05621235779611867, "grad_norm": 0.20364375412464142, "learning_rate": 1.999368635043229e-05, "loss": 0.5764, "step": 252 }, { "epoch": 0.05643542270800803, "grad_norm": 0.2179926186800003, "learning_rate": 1.9993602457979574e-05, "loss": 0.5583, "step": 253 }, { "epoch": 0.05665848761989739, "grad_norm": 0.23844636976718903, "learning_rate": 1.9993518012018297e-05, "loss": 0.5768, "step": 254 }, { "epoch": 0.05688155253178675, "grad_norm": 0.21450263261795044, "learning_rate": 1.9993433012553128e-05, "loss": 0.5814, "step": 255 }, { "epoch": 0.05710461744367611, "grad_norm": 0.22201555967330933, "learning_rate": 1.9993347459588777e-05, "loss": 0.5565, "step": 256 }, { "epoch": 0.05732768235556547, "grad_norm": 0.2182476669549942, "learning_rate": 1.9993261353129988e-05, "loss": 0.5818, "step": 257 }, { "epoch": 0.05755074726745483, "grad_norm": 0.1971082240343094, "learning_rate": 1.9993174693181517e-05, "loss": 0.5686, "step": 258 }, { "epoch": 0.057773812179344186, "grad_norm": 0.20535312592983246, "learning_rate": 1.999308747974818e-05, "loss": 0.5687, "step": 259 }, { "epoch": 0.05799687709123355, "grad_norm": 0.218933567404747, "learning_rate": 1.9992999712834794e-05, "loss": 0.5769, "step": 260 }, { "epoch": 0.05821994200312291, "grad_norm": 0.2132030874490738, "learning_rate": 1.9992911392446227e-05, "loss": 0.596, "step": 261 }, { "epoch": 0.058443006915012266, "grad_norm": 0.19926026463508606, "learning_rate": 1.999282251858737e-05, "loss": 0.558, "step": 262 }, { "epoch": 0.05866607182690163, "grad_norm": 0.2089562714099884, "learning_rate": 1.9992733091263144e-05, "loss": 0.582, "step": 263 }, { "epoch": 0.05888913673879099, "grad_norm": 0.20722773671150208, "learning_rate": 1.9992643110478504e-05, "loss": 0.5758, "step": 264 }, { "epoch": 0.059112201650680346, "grad_norm": 0.21631532907485962, "learning_rate": 1.999255257623843e-05, "loss": 0.5725, "step": 265 }, { "epoch": 0.05933526656256971, "grad_norm": 0.21997545659542084, "learning_rate": 1.999246148854794e-05, "loss": 0.5915, "step": 266 }, { "epoch": 0.05955833147445907, "grad_norm": 0.4292175769805908, "learning_rate": 1.9992369847412076e-05, "loss": 0.5891, "step": 267 }, { "epoch": 0.059781396386348426, "grad_norm": 0.2055254876613617, "learning_rate": 1.9992277652835918e-05, "loss": 0.5219, "step": 268 }, { "epoch": 0.06000446129823779, "grad_norm": 0.20326492190361023, "learning_rate": 1.9992184904824566e-05, "loss": 0.5462, "step": 269 }, { "epoch": 0.06022752621012715, "grad_norm": 0.19965752959251404, "learning_rate": 1.9992091603383164e-05, "loss": 0.5688, "step": 270 }, { "epoch": 0.060450591122016506, "grad_norm": 0.19592702388763428, "learning_rate": 1.9991997748516872e-05, "loss": 0.5556, "step": 271 }, { "epoch": 0.060673656033905864, "grad_norm": 0.2036285698413849, "learning_rate": 1.9991903340230898e-05, "loss": 0.5452, "step": 272 }, { "epoch": 0.06089672094579523, "grad_norm": 0.21509170532226562, "learning_rate": 1.9991808378530465e-05, "loss": 0.5682, "step": 273 }, { "epoch": 0.061119785857684586, "grad_norm": 0.2015625238418579, "learning_rate": 1.9991712863420832e-05, "loss": 0.5846, "step": 274 }, { "epoch": 0.061342850769573944, "grad_norm": 0.21707095205783844, "learning_rate": 1.9991616794907286e-05, "loss": 0.5666, "step": 275 }, { "epoch": 0.06156591568146331, "grad_norm": 0.1988460123538971, "learning_rate": 1.9991520172995158e-05, "loss": 0.5875, "step": 276 }, { "epoch": 0.061788980593352666, "grad_norm": 0.19290049374103546, "learning_rate": 1.999142299768979e-05, "loss": 0.5497, "step": 277 }, { "epoch": 0.062012045505242024, "grad_norm": 0.2069414258003235, "learning_rate": 1.9991325268996567e-05, "loss": 0.5438, "step": 278 }, { "epoch": 0.06223511041713139, "grad_norm": 0.22346992790699005, "learning_rate": 1.9991226986920906e-05, "loss": 0.5668, "step": 279 }, { "epoch": 0.062458175329020746, "grad_norm": 0.21194760501384735, "learning_rate": 1.9991128151468247e-05, "loss": 0.5957, "step": 280 }, { "epoch": 0.06268124024091011, "grad_norm": 0.209646537899971, "learning_rate": 1.9991028762644063e-05, "loss": 0.5748, "step": 281 }, { "epoch": 0.06290430515279946, "grad_norm": 0.20566052198410034, "learning_rate": 1.9990928820453858e-05, "loss": 0.5506, "step": 282 }, { "epoch": 0.06312737006468883, "grad_norm": 0.21374207735061646, "learning_rate": 1.999082832490317e-05, "loss": 0.5697, "step": 283 }, { "epoch": 0.06335043497657819, "grad_norm": 0.21012777090072632, "learning_rate": 1.999072727599757e-05, "loss": 0.5112, "step": 284 }, { "epoch": 0.06357349988846754, "grad_norm": 0.19954928755760193, "learning_rate": 1.9990625673742644e-05, "loss": 0.579, "step": 285 }, { "epoch": 0.0637965648003569, "grad_norm": 0.20394836366176605, "learning_rate": 1.9990523518144027e-05, "loss": 0.5425, "step": 286 }, { "epoch": 0.06401962971224627, "grad_norm": 0.20405294001102448, "learning_rate": 1.9990420809207375e-05, "loss": 0.5538, "step": 287 }, { "epoch": 0.06424269462413562, "grad_norm": 0.24065051972866058, "learning_rate": 1.9990317546938373e-05, "loss": 0.5786, "step": 288 }, { "epoch": 0.06446575953602499, "grad_norm": 0.20158305764198303, "learning_rate": 1.9990213731342747e-05, "loss": 0.5475, "step": 289 }, { "epoch": 0.06468882444791434, "grad_norm": 0.29271385073661804, "learning_rate": 1.9990109362426243e-05, "loss": 0.5468, "step": 290 }, { "epoch": 0.0649118893598037, "grad_norm": 0.2596181333065033, "learning_rate": 1.999000444019464e-05, "loss": 0.5632, "step": 291 }, { "epoch": 0.06513495427169307, "grad_norm": 0.20416077971458435, "learning_rate": 1.9989898964653753e-05, "loss": 0.5755, "step": 292 }, { "epoch": 0.06535801918358242, "grad_norm": 0.24204829335212708, "learning_rate": 1.998979293580942e-05, "loss": 0.5679, "step": 293 }, { "epoch": 0.06558108409547178, "grad_norm": 0.20215743780136108, "learning_rate": 1.9989686353667522e-05, "loss": 0.5596, "step": 294 }, { "epoch": 0.06580414900736115, "grad_norm": 0.24796119332313538, "learning_rate": 1.998957921823395e-05, "loss": 0.546, "step": 295 }, { "epoch": 0.0660272139192505, "grad_norm": 0.22253303229808807, "learning_rate": 1.9989471529514647e-05, "loss": 0.5606, "step": 296 }, { "epoch": 0.06625027883113986, "grad_norm": 0.19948336482048035, "learning_rate": 1.9989363287515577e-05, "loss": 0.596, "step": 297 }, { "epoch": 0.06647334374302923, "grad_norm": 0.26588407158851624, "learning_rate": 1.9989254492242727e-05, "loss": 0.5347, "step": 298 }, { "epoch": 0.06669640865491858, "grad_norm": 0.2680055797100067, "learning_rate": 1.9989145143702132e-05, "loss": 0.588, "step": 299 }, { "epoch": 0.06691947356680794, "grad_norm": 0.2872098684310913, "learning_rate": 1.9989035241899844e-05, "loss": 0.5261, "step": 300 }, { "epoch": 0.0671425384786973, "grad_norm": 0.19267728924751282, "learning_rate": 1.998892478684195e-05, "loss": 0.5807, "step": 301 }, { "epoch": 0.06736560339058666, "grad_norm": 0.20785115659236908, "learning_rate": 1.9988813778534568e-05, "loss": 0.5972, "step": 302 }, { "epoch": 0.06758866830247602, "grad_norm": 0.1926824450492859, "learning_rate": 1.998870221698385e-05, "loss": 0.5495, "step": 303 }, { "epoch": 0.06781173321436539, "grad_norm": 0.21504642069339752, "learning_rate": 1.9988590102195968e-05, "loss": 0.5593, "step": 304 }, { "epoch": 0.06803479812625474, "grad_norm": 0.22837965190410614, "learning_rate": 1.9988477434177136e-05, "loss": 0.5749, "step": 305 }, { "epoch": 0.0682578630381441, "grad_norm": 0.27670159935951233, "learning_rate": 1.9988364212933595e-05, "loss": 0.5474, "step": 306 }, { "epoch": 0.06848092795003347, "grad_norm": 0.1969708949327469, "learning_rate": 1.9988250438471612e-05, "loss": 0.573, "step": 307 }, { "epoch": 0.06870399286192282, "grad_norm": 0.19591858983039856, "learning_rate": 1.9988136110797494e-05, "loss": 0.5677, "step": 308 }, { "epoch": 0.06892705777381218, "grad_norm": 0.1888752281665802, "learning_rate": 1.998802122991757e-05, "loss": 0.5485, "step": 309 }, { "epoch": 0.06915012268570155, "grad_norm": 0.20352420210838318, "learning_rate": 1.9987905795838204e-05, "loss": 0.5635, "step": 310 }, { "epoch": 0.0693731875975909, "grad_norm": 0.19486026465892792, "learning_rate": 1.9987789808565785e-05, "loss": 0.5624, "step": 311 }, { "epoch": 0.06959625250948026, "grad_norm": 0.20236323773860931, "learning_rate": 1.9987673268106742e-05, "loss": 0.5817, "step": 312 }, { "epoch": 0.06981931742136963, "grad_norm": 0.2420659214258194, "learning_rate": 1.998755617446753e-05, "loss": 0.5586, "step": 313 }, { "epoch": 0.07004238233325898, "grad_norm": 0.21799051761627197, "learning_rate": 1.9987438527654633e-05, "loss": 0.5918, "step": 314 }, { "epoch": 0.07026544724514834, "grad_norm": 0.19442883133888245, "learning_rate": 1.9987320327674566e-05, "loss": 0.5621, "step": 315 }, { "epoch": 0.07048851215703769, "grad_norm": 0.19582122564315796, "learning_rate": 1.9987201574533876e-05, "loss": 0.5432, "step": 316 }, { "epoch": 0.07071157706892706, "grad_norm": 0.1984792798757553, "learning_rate": 1.998708226823914e-05, "loss": 0.5532, "step": 317 }, { "epoch": 0.07093464198081642, "grad_norm": 0.2259417176246643, "learning_rate": 1.9986962408796972e-05, "loss": 0.5676, "step": 318 }, { "epoch": 0.07115770689270577, "grad_norm": 0.2008512020111084, "learning_rate": 1.9986841996213998e-05, "loss": 0.5499, "step": 319 }, { "epoch": 0.07138077180459514, "grad_norm": 0.19798852503299713, "learning_rate": 1.99867210304969e-05, "loss": 0.564, "step": 320 }, { "epoch": 0.0716038367164845, "grad_norm": 0.21567919850349426, "learning_rate": 1.998659951165237e-05, "loss": 0.5701, "step": 321 }, { "epoch": 0.07182690162837385, "grad_norm": 0.21192528307437897, "learning_rate": 1.998647743968714e-05, "loss": 0.595, "step": 322 }, { "epoch": 0.07204996654026322, "grad_norm": 0.1853858083486557, "learning_rate": 1.9986354814607974e-05, "loss": 0.5546, "step": 323 }, { "epoch": 0.07227303145215258, "grad_norm": 0.19545771181583405, "learning_rate": 1.998623163642166e-05, "loss": 0.5602, "step": 324 }, { "epoch": 0.07249609636404193, "grad_norm": 0.20142099261283875, "learning_rate": 1.998610790513502e-05, "loss": 0.5589, "step": 325 }, { "epoch": 0.0727191612759313, "grad_norm": 0.1894591748714447, "learning_rate": 1.9985983620754914e-05, "loss": 0.5855, "step": 326 }, { "epoch": 0.07294222618782066, "grad_norm": 0.19726653397083282, "learning_rate": 1.998585878328822e-05, "loss": 0.559, "step": 327 }, { "epoch": 0.07316529109971001, "grad_norm": 0.19552524387836456, "learning_rate": 1.998573339274185e-05, "loss": 0.5643, "step": 328 }, { "epoch": 0.07338835601159938, "grad_norm": 0.2093891054391861, "learning_rate": 1.9985607449122754e-05, "loss": 0.5661, "step": 329 }, { "epoch": 0.07361142092348874, "grad_norm": 0.2397080659866333, "learning_rate": 1.9985480952437902e-05, "loss": 0.565, "step": 330 }, { "epoch": 0.07383448583537809, "grad_norm": 0.21570439636707306, "learning_rate": 1.998535390269431e-05, "loss": 0.5505, "step": 331 }, { "epoch": 0.07405755074726746, "grad_norm": 0.18352028727531433, "learning_rate": 1.9985226299899006e-05, "loss": 0.5501, "step": 332 }, { "epoch": 0.07428061565915682, "grad_norm": 0.20006653666496277, "learning_rate": 1.9985098144059058e-05, "loss": 0.5559, "step": 333 }, { "epoch": 0.07450368057104617, "grad_norm": 0.22780755162239075, "learning_rate": 1.998496943518157e-05, "loss": 0.5886, "step": 334 }, { "epoch": 0.07472674548293554, "grad_norm": 0.18735693395137787, "learning_rate": 1.9984840173273662e-05, "loss": 0.5714, "step": 335 }, { "epoch": 0.0749498103948249, "grad_norm": 0.1882167011499405, "learning_rate": 1.99847103583425e-05, "loss": 0.5586, "step": 336 }, { "epoch": 0.07517287530671425, "grad_norm": 0.20951317250728607, "learning_rate": 1.9984579990395274e-05, "loss": 0.5286, "step": 337 }, { "epoch": 0.07539594021860362, "grad_norm": 0.2166478931903839, "learning_rate": 1.9984449069439197e-05, "loss": 0.5661, "step": 338 }, { "epoch": 0.07561900513049297, "grad_norm": 0.191757470369339, "learning_rate": 1.998431759548153e-05, "loss": 0.5431, "step": 339 }, { "epoch": 0.07584207004238233, "grad_norm": 0.2109043151140213, "learning_rate": 1.998418556852955e-05, "loss": 0.5642, "step": 340 }, { "epoch": 0.0760651349542717, "grad_norm": 0.2524346709251404, "learning_rate": 1.9984052988590573e-05, "loss": 0.5505, "step": 341 }, { "epoch": 0.07628819986616105, "grad_norm": 0.22443033754825592, "learning_rate": 1.9983919855671937e-05, "loss": 0.5454, "step": 342 }, { "epoch": 0.07651126477805041, "grad_norm": 0.1956530660390854, "learning_rate": 1.9983786169781017e-05, "loss": 0.5546, "step": 343 }, { "epoch": 0.07673432968993978, "grad_norm": 0.20710930228233337, "learning_rate": 1.9983651930925217e-05, "loss": 0.5554, "step": 344 }, { "epoch": 0.07695739460182913, "grad_norm": 0.2150043398141861, "learning_rate": 1.9983517139111978e-05, "loss": 0.5434, "step": 345 }, { "epoch": 0.07718045951371849, "grad_norm": 0.21276871860027313, "learning_rate": 1.998338179434876e-05, "loss": 0.5623, "step": 346 }, { "epoch": 0.07740352442560786, "grad_norm": 0.20740285515785217, "learning_rate": 1.998324589664306e-05, "loss": 0.5804, "step": 347 }, { "epoch": 0.0776265893374972, "grad_norm": 0.20988604426383972, "learning_rate": 1.99831094460024e-05, "loss": 0.5742, "step": 348 }, { "epoch": 0.07784965424938657, "grad_norm": 0.19990280270576477, "learning_rate": 1.9982972442434346e-05, "loss": 0.5397, "step": 349 }, { "epoch": 0.07807271916127594, "grad_norm": 0.295187383890152, "learning_rate": 1.9982834885946482e-05, "loss": 0.5713, "step": 350 }, { "epoch": 0.07829578407316529, "grad_norm": 0.20176567137241364, "learning_rate": 1.998269677654643e-05, "loss": 0.5548, "step": 351 }, { "epoch": 0.07851884898505465, "grad_norm": 0.20890064537525177, "learning_rate": 1.9982558114241837e-05, "loss": 0.5739, "step": 352 }, { "epoch": 0.07874191389694402, "grad_norm": 0.2795037031173706, "learning_rate": 1.998241889904038e-05, "loss": 0.5343, "step": 353 }, { "epoch": 0.07896497880883337, "grad_norm": 0.22098244726657867, "learning_rate": 1.9982279130949775e-05, "loss": 0.517, "step": 354 }, { "epoch": 0.07918804372072273, "grad_norm": 0.20403707027435303, "learning_rate": 1.998213880997776e-05, "loss": 0.5599, "step": 355 }, { "epoch": 0.0794111086326121, "grad_norm": 0.2201310694217682, "learning_rate": 1.9981997936132107e-05, "loss": 0.5348, "step": 356 }, { "epoch": 0.07963417354450145, "grad_norm": 0.22818854451179504, "learning_rate": 1.998185650942062e-05, "loss": 0.551, "step": 357 }, { "epoch": 0.07985723845639081, "grad_norm": 0.23051077127456665, "learning_rate": 1.9981714529851127e-05, "loss": 0.5926, "step": 358 }, { "epoch": 0.08008030336828018, "grad_norm": 0.21485844254493713, "learning_rate": 1.99815719974315e-05, "loss": 0.5801, "step": 359 }, { "epoch": 0.08030336828016953, "grad_norm": 0.1935802698135376, "learning_rate": 1.998142891216963e-05, "loss": 0.5411, "step": 360 }, { "epoch": 0.08052643319205889, "grad_norm": 0.1971837282180786, "learning_rate": 1.998128527407344e-05, "loss": 0.5805, "step": 361 }, { "epoch": 0.08074949810394826, "grad_norm": 0.20717322826385498, "learning_rate": 1.9981141083150886e-05, "loss": 0.5493, "step": 362 }, { "epoch": 0.0809725630158376, "grad_norm": 0.2011585831642151, "learning_rate": 1.9980996339409957e-05, "loss": 0.5579, "step": 363 }, { "epoch": 0.08119562792772697, "grad_norm": 0.19211354851722717, "learning_rate": 1.9980851042858664e-05, "loss": 0.5383, "step": 364 }, { "epoch": 0.08141869283961632, "grad_norm": 0.21699954569339752, "learning_rate": 1.998070519350506e-05, "loss": 0.5524, "step": 365 }, { "epoch": 0.08164175775150569, "grad_norm": 0.20210997760295868, "learning_rate": 1.9980558791357222e-05, "loss": 0.5738, "step": 366 }, { "epoch": 0.08186482266339505, "grad_norm": 0.18855418264865875, "learning_rate": 1.9980411836423256e-05, "loss": 0.547, "step": 367 }, { "epoch": 0.0820878875752844, "grad_norm": 0.21640720963478088, "learning_rate": 1.9980264328711305e-05, "loss": 0.5964, "step": 368 }, { "epoch": 0.08231095248717377, "grad_norm": 0.2280052751302719, "learning_rate": 1.9980116268229536e-05, "loss": 0.561, "step": 369 }, { "epoch": 0.08253401739906313, "grad_norm": 0.1860683411359787, "learning_rate": 1.9979967654986155e-05, "loss": 0.545, "step": 370 }, { "epoch": 0.08275708231095248, "grad_norm": 0.19923582673072815, "learning_rate": 1.9979818488989383e-05, "loss": 0.5421, "step": 371 }, { "epoch": 0.08298014722284185, "grad_norm": 0.23478665947914124, "learning_rate": 1.997966877024749e-05, "loss": 0.5652, "step": 372 }, { "epoch": 0.08320321213473121, "grad_norm": 0.2126697599887848, "learning_rate": 1.9979518498768768e-05, "loss": 0.5599, "step": 373 }, { "epoch": 0.08342627704662056, "grad_norm": 0.20854973793029785, "learning_rate": 1.9979367674561535e-05, "loss": 0.5425, "step": 374 }, { "epoch": 0.08364934195850993, "grad_norm": 0.20865046977996826, "learning_rate": 1.9979216297634148e-05, "loss": 0.5529, "step": 375 }, { "epoch": 0.08387240687039929, "grad_norm": 0.24369299411773682, "learning_rate": 1.997906436799499e-05, "loss": 0.535, "step": 376 }, { "epoch": 0.08409547178228864, "grad_norm": 0.2064153105020523, "learning_rate": 1.9978911885652475e-05, "loss": 0.5488, "step": 377 }, { "epoch": 0.084318536694178, "grad_norm": 0.21306705474853516, "learning_rate": 1.997875885061505e-05, "loss": 0.5627, "step": 378 }, { "epoch": 0.08454160160606737, "grad_norm": 0.19575418531894684, "learning_rate": 1.9978605262891196e-05, "loss": 0.5565, "step": 379 }, { "epoch": 0.08476466651795672, "grad_norm": 0.19842910766601562, "learning_rate": 1.9978451122489412e-05, "loss": 0.5709, "step": 380 }, { "epoch": 0.08498773142984609, "grad_norm": 0.20483329892158508, "learning_rate": 1.9978296429418237e-05, "loss": 0.5773, "step": 381 }, { "epoch": 0.08521079634173545, "grad_norm": 0.22734715044498444, "learning_rate": 1.997814118368624e-05, "loss": 0.5469, "step": 382 }, { "epoch": 0.0854338612536248, "grad_norm": 0.19727206230163574, "learning_rate": 1.997798538530202e-05, "loss": 0.5541, "step": 383 }, { "epoch": 0.08565692616551417, "grad_norm": 0.19417184591293335, "learning_rate": 1.9977829034274205e-05, "loss": 0.5513, "step": 384 }, { "epoch": 0.08587999107740353, "grad_norm": 0.2037704885005951, "learning_rate": 1.9977672130611454e-05, "loss": 0.5567, "step": 385 }, { "epoch": 0.08610305598929288, "grad_norm": 0.18121762573719025, "learning_rate": 1.997751467432246e-05, "loss": 0.5357, "step": 386 }, { "epoch": 0.08632612090118225, "grad_norm": 0.27105584740638733, "learning_rate": 1.997735666541594e-05, "loss": 0.5654, "step": 387 }, { "epoch": 0.08654918581307161, "grad_norm": 0.20133094489574432, "learning_rate": 1.997719810390065e-05, "loss": 0.5349, "step": 388 }, { "epoch": 0.08677225072496096, "grad_norm": 0.18842002749443054, "learning_rate": 1.997703898978537e-05, "loss": 0.5352, "step": 389 }, { "epoch": 0.08699531563685033, "grad_norm": 0.2452889233827591, "learning_rate": 1.9976879323078913e-05, "loss": 0.5899, "step": 390 }, { "epoch": 0.08721838054873968, "grad_norm": 0.2256636619567871, "learning_rate": 1.9976719103790118e-05, "loss": 0.5115, "step": 391 }, { "epoch": 0.08744144546062904, "grad_norm": 0.1972501426935196, "learning_rate": 1.9976558331927868e-05, "loss": 0.5523, "step": 392 }, { "epoch": 0.0876645103725184, "grad_norm": 0.2026263177394867, "learning_rate": 1.9976397007501062e-05, "loss": 0.5758, "step": 393 }, { "epoch": 0.08788757528440776, "grad_norm": 0.20572836697101593, "learning_rate": 1.9976235130518632e-05, "loss": 0.5429, "step": 394 }, { "epoch": 0.08811064019629712, "grad_norm": 0.18851211667060852, "learning_rate": 1.997607270098955e-05, "loss": 0.5616, "step": 395 }, { "epoch": 0.08833370510818649, "grad_norm": 0.20447294414043427, "learning_rate": 1.9975909718922806e-05, "loss": 0.5527, "step": 396 }, { "epoch": 0.08855677002007584, "grad_norm": 0.19188085198402405, "learning_rate": 1.997574618432744e-05, "loss": 0.558, "step": 397 }, { "epoch": 0.0887798349319652, "grad_norm": 0.19854480028152466, "learning_rate": 1.997558209721249e-05, "loss": 0.5756, "step": 398 }, { "epoch": 0.08900289984385457, "grad_norm": 0.19832177460193634, "learning_rate": 1.997541745758706e-05, "loss": 0.569, "step": 399 }, { "epoch": 0.08922596475574392, "grad_norm": 0.20165055990219116, "learning_rate": 1.9975252265460265e-05, "loss": 0.5376, "step": 400 }, { "epoch": 0.08944902966763328, "grad_norm": 0.18997038900852203, "learning_rate": 1.997508652084125e-05, "loss": 0.5745, "step": 401 }, { "epoch": 0.08967209457952265, "grad_norm": 0.19646713137626648, "learning_rate": 1.9974920223739195e-05, "loss": 0.5454, "step": 402 }, { "epoch": 0.089895159491412, "grad_norm": 0.1976052224636078, "learning_rate": 1.997475337416332e-05, "loss": 0.537, "step": 403 }, { "epoch": 0.09011822440330136, "grad_norm": 0.2045581340789795, "learning_rate": 1.9974585972122857e-05, "loss": 0.5405, "step": 404 }, { "epoch": 0.09034128931519073, "grad_norm": 0.18631087243556976, "learning_rate": 1.9974418017627076e-05, "loss": 0.6034, "step": 405 }, { "epoch": 0.09056435422708008, "grad_norm": 0.2022215723991394, "learning_rate": 1.9974249510685285e-05, "loss": 0.5682, "step": 406 }, { "epoch": 0.09078741913896944, "grad_norm": 0.19569364190101624, "learning_rate": 1.9974080451306816e-05, "loss": 0.5739, "step": 407 }, { "epoch": 0.0910104840508588, "grad_norm": 0.20746709406375885, "learning_rate": 1.9973910839501035e-05, "loss": 0.55, "step": 408 }, { "epoch": 0.09123354896274816, "grad_norm": 0.21757768094539642, "learning_rate": 1.997374067527733e-05, "loss": 0.5404, "step": 409 }, { "epoch": 0.09145661387463752, "grad_norm": 0.1881277859210968, "learning_rate": 1.997356995864513e-05, "loss": 0.5435, "step": 410 }, { "epoch": 0.09167967878652689, "grad_norm": 0.1868787705898285, "learning_rate": 1.9973398689613892e-05, "loss": 0.5373, "step": 411 }, { "epoch": 0.09190274369841624, "grad_norm": 0.1938547044992447, "learning_rate": 1.9973226868193096e-05, "loss": 0.5846, "step": 412 }, { "epoch": 0.0921258086103056, "grad_norm": 0.19920659065246582, "learning_rate": 1.9973054494392265e-05, "loss": 0.5615, "step": 413 }, { "epoch": 0.09234887352219495, "grad_norm": 0.18801911175251007, "learning_rate": 1.997288156822094e-05, "loss": 0.5675, "step": 414 }, { "epoch": 0.09257193843408432, "grad_norm": 0.1990530639886856, "learning_rate": 1.9972708089688705e-05, "loss": 0.5626, "step": 415 }, { "epoch": 0.09279500334597368, "grad_norm": 0.18555153906345367, "learning_rate": 1.9972534058805163e-05, "loss": 0.557, "step": 416 }, { "epoch": 0.09301806825786303, "grad_norm": 0.20318473875522614, "learning_rate": 1.9972359475579953e-05, "loss": 0.5196, "step": 417 }, { "epoch": 0.0932411331697524, "grad_norm": 0.20757220685482025, "learning_rate": 1.997218434002275e-05, "loss": 0.5454, "step": 418 }, { "epoch": 0.09346419808164176, "grad_norm": 0.20588824152946472, "learning_rate": 1.997200865214325e-05, "loss": 0.5347, "step": 419 }, { "epoch": 0.09368726299353111, "grad_norm": 0.20199880003929138, "learning_rate": 1.9971832411951186e-05, "loss": 0.5531, "step": 420 }, { "epoch": 0.09391032790542048, "grad_norm": 0.19605985283851624, "learning_rate": 1.9971655619456313e-05, "loss": 0.5263, "step": 421 }, { "epoch": 0.09413339281730984, "grad_norm": 0.19716261327266693, "learning_rate": 1.997147827466843e-05, "loss": 0.5722, "step": 422 }, { "epoch": 0.09435645772919919, "grad_norm": 0.20400184392929077, "learning_rate": 1.997130037759736e-05, "loss": 0.5629, "step": 423 }, { "epoch": 0.09457952264108856, "grad_norm": 0.203161358833313, "learning_rate": 1.997112192825295e-05, "loss": 0.5538, "step": 424 }, { "epoch": 0.09480258755297792, "grad_norm": 0.20870056748390198, "learning_rate": 1.997094292664509e-05, "loss": 0.5548, "step": 425 }, { "epoch": 0.09502565246486727, "grad_norm": 0.1856376975774765, "learning_rate": 1.9970763372783687e-05, "loss": 0.5193, "step": 426 }, { "epoch": 0.09524871737675664, "grad_norm": 0.1954246610403061, "learning_rate": 1.997058326667869e-05, "loss": 0.5336, "step": 427 }, { "epoch": 0.095471782288646, "grad_norm": 0.1996607482433319, "learning_rate": 1.9970402608340076e-05, "loss": 0.5708, "step": 428 }, { "epoch": 0.09569484720053535, "grad_norm": 0.21878129243850708, "learning_rate": 1.9970221397777848e-05, "loss": 0.5794, "step": 429 }, { "epoch": 0.09591791211242472, "grad_norm": 0.19142527878284454, "learning_rate": 1.9970039635002044e-05, "loss": 0.5221, "step": 430 }, { "epoch": 0.09614097702431408, "grad_norm": 0.18871352076530457, "learning_rate": 1.996985732002273e-05, "loss": 0.5321, "step": 431 }, { "epoch": 0.09636404193620343, "grad_norm": 0.1942790001630783, "learning_rate": 1.996967445285001e-05, "loss": 0.5269, "step": 432 }, { "epoch": 0.0965871068480928, "grad_norm": 0.19900670647621155, "learning_rate": 1.9969491033494e-05, "loss": 0.6048, "step": 433 }, { "epoch": 0.09681017175998216, "grad_norm": 0.193121075630188, "learning_rate": 1.9969307061964873e-05, "loss": 0.5613, "step": 434 }, { "epoch": 0.09703323667187151, "grad_norm": 0.1859043687582016, "learning_rate": 1.9969122538272807e-05, "loss": 0.5657, "step": 435 }, { "epoch": 0.09725630158376088, "grad_norm": 0.18597924709320068, "learning_rate": 1.9968937462428028e-05, "loss": 0.5336, "step": 436 }, { "epoch": 0.09747936649565024, "grad_norm": 0.17796443402767181, "learning_rate": 1.9968751834440783e-05, "loss": 0.5611, "step": 437 }, { "epoch": 0.09770243140753959, "grad_norm": 0.1958586871623993, "learning_rate": 1.9968565654321356e-05, "loss": 0.5377, "step": 438 }, { "epoch": 0.09792549631942896, "grad_norm": 0.21395692229270935, "learning_rate": 1.996837892208006e-05, "loss": 0.5347, "step": 439 }, { "epoch": 0.09814856123131831, "grad_norm": 0.19348031282424927, "learning_rate": 1.9968191637727235e-05, "loss": 0.5576, "step": 440 }, { "epoch": 0.09837162614320767, "grad_norm": 0.19345401227474213, "learning_rate": 1.9968003801273253e-05, "loss": 0.5697, "step": 441 }, { "epoch": 0.09859469105509704, "grad_norm": 0.18203531205654144, "learning_rate": 1.9967815412728523e-05, "loss": 0.5416, "step": 442 }, { "epoch": 0.09881775596698639, "grad_norm": 0.20109198987483978, "learning_rate": 1.9967626472103472e-05, "loss": 0.5514, "step": 443 }, { "epoch": 0.09904082087887575, "grad_norm": 0.18818293511867523, "learning_rate": 1.996743697940857e-05, "loss": 0.5535, "step": 444 }, { "epoch": 0.09926388579076512, "grad_norm": 0.18718019127845764, "learning_rate": 1.996724693465431e-05, "loss": 0.5595, "step": 445 }, { "epoch": 0.09948695070265447, "grad_norm": 0.18856103718280792, "learning_rate": 1.9967056337851217e-05, "loss": 0.5364, "step": 446 }, { "epoch": 0.09971001561454383, "grad_norm": 0.18582871556282043, "learning_rate": 1.996686518900985e-05, "loss": 0.5632, "step": 447 }, { "epoch": 0.0999330805264332, "grad_norm": 0.19166669249534607, "learning_rate": 1.9966673488140794e-05, "loss": 0.5727, "step": 448 }, { "epoch": 0.10015614543832255, "grad_norm": 0.18631108105182648, "learning_rate": 1.9966481235254667e-05, "loss": 0.5472, "step": 449 }, { "epoch": 0.10037921035021191, "grad_norm": 0.18970704078674316, "learning_rate": 1.996628843036212e-05, "loss": 0.5355, "step": 450 }, { "epoch": 0.10060227526210128, "grad_norm": 0.18836888670921326, "learning_rate": 1.9966095073473828e-05, "loss": 0.5309, "step": 451 }, { "epoch": 0.10082534017399063, "grad_norm": 0.1924007087945938, "learning_rate": 1.99659011646005e-05, "loss": 0.5213, "step": 452 }, { "epoch": 0.10104840508587999, "grad_norm": 0.19710196554660797, "learning_rate": 1.996570670375288e-05, "loss": 0.5333, "step": 453 }, { "epoch": 0.10127146999776936, "grad_norm": 0.1823819875717163, "learning_rate": 1.9965511690941737e-05, "loss": 0.5378, "step": 454 }, { "epoch": 0.1014945349096587, "grad_norm": 0.21200565993785858, "learning_rate": 1.9965316126177867e-05, "loss": 0.5594, "step": 455 }, { "epoch": 0.10171759982154807, "grad_norm": 0.18618617951869965, "learning_rate": 1.9965120009472106e-05, "loss": 0.5317, "step": 456 }, { "epoch": 0.10194066473343744, "grad_norm": 0.19600717723369598, "learning_rate": 1.996492334083532e-05, "loss": 0.5612, "step": 457 }, { "epoch": 0.10216372964532679, "grad_norm": 0.1988876461982727, "learning_rate": 1.9964726120278394e-05, "loss": 0.5709, "step": 458 }, { "epoch": 0.10238679455721615, "grad_norm": 0.18938012421131134, "learning_rate": 1.9964528347812255e-05, "loss": 0.5413, "step": 459 }, { "epoch": 0.10260985946910552, "grad_norm": 0.18799841403961182, "learning_rate": 1.9964330023447854e-05, "loss": 0.5534, "step": 460 }, { "epoch": 0.10283292438099487, "grad_norm": 0.229729562997818, "learning_rate": 1.9964131147196185e-05, "loss": 0.5471, "step": 461 }, { "epoch": 0.10305598929288423, "grad_norm": 0.19921550154685974, "learning_rate": 1.9963931719068253e-05, "loss": 0.5419, "step": 462 }, { "epoch": 0.1032790542047736, "grad_norm": 0.19583772122859955, "learning_rate": 1.9963731739075106e-05, "loss": 0.5412, "step": 463 }, { "epoch": 0.10350211911666295, "grad_norm": 0.19680215418338776, "learning_rate": 1.996353120722782e-05, "loss": 0.5477, "step": 464 }, { "epoch": 0.10372518402855231, "grad_norm": 0.20173701643943787, "learning_rate": 1.9963330123537507e-05, "loss": 0.5523, "step": 465 }, { "epoch": 0.10394824894044166, "grad_norm": 0.23238573968410492, "learning_rate": 1.9963128488015294e-05, "loss": 0.559, "step": 466 }, { "epoch": 0.10417131385233103, "grad_norm": 0.22626003623008728, "learning_rate": 1.996292630067236e-05, "loss": 0.526, "step": 467 }, { "epoch": 0.10439437876422039, "grad_norm": 0.21178299188613892, "learning_rate": 1.9962723561519893e-05, "loss": 0.5301, "step": 468 }, { "epoch": 0.10461744367610974, "grad_norm": 0.1937963217496872, "learning_rate": 1.996252027056913e-05, "loss": 0.5449, "step": 469 }, { "epoch": 0.1048405085879991, "grad_norm": 0.18095934391021729, "learning_rate": 1.996231642783133e-05, "loss": 0.5676, "step": 470 }, { "epoch": 0.10506357349988847, "grad_norm": 0.2064565271139145, "learning_rate": 1.9962112033317776e-05, "loss": 0.5157, "step": 471 }, { "epoch": 0.10528663841177782, "grad_norm": 0.1964641511440277, "learning_rate": 1.9961907087039796e-05, "loss": 0.6127, "step": 472 }, { "epoch": 0.10550970332366719, "grad_norm": 0.17736569046974182, "learning_rate": 1.996170158900874e-05, "loss": 0.545, "step": 473 }, { "epoch": 0.10573276823555655, "grad_norm": 0.1836784929037094, "learning_rate": 1.9961495539235985e-05, "loss": 0.5525, "step": 474 }, { "epoch": 0.1059558331474459, "grad_norm": 0.2029949426651001, "learning_rate": 1.996128893773295e-05, "loss": 0.5558, "step": 475 }, { "epoch": 0.10617889805933527, "grad_norm": 0.207282155752182, "learning_rate": 1.9961081784511073e-05, "loss": 0.5789, "step": 476 }, { "epoch": 0.10640196297122463, "grad_norm": 0.20277360081672668, "learning_rate": 1.9960874079581828e-05, "loss": 0.5555, "step": 477 }, { "epoch": 0.10662502788311398, "grad_norm": 0.21180874109268188, "learning_rate": 1.996066582295672e-05, "loss": 0.574, "step": 478 }, { "epoch": 0.10684809279500335, "grad_norm": 0.22067199647426605, "learning_rate": 1.996045701464729e-05, "loss": 0.5641, "step": 479 }, { "epoch": 0.10707115770689271, "grad_norm": 0.2735600471496582, "learning_rate": 1.9960247654665088e-05, "loss": 0.5473, "step": 480 }, { "epoch": 0.10729422261878206, "grad_norm": 0.19656887650489807, "learning_rate": 1.9960037743021723e-05, "loss": 0.5614, "step": 481 }, { "epoch": 0.10751728753067143, "grad_norm": 0.19436019659042358, "learning_rate": 1.9959827279728815e-05, "loss": 0.5554, "step": 482 }, { "epoch": 0.10774035244256079, "grad_norm": 0.1874009072780609, "learning_rate": 1.9959616264798022e-05, "loss": 0.5723, "step": 483 }, { "epoch": 0.10796341735445014, "grad_norm": 0.2946108877658844, "learning_rate": 1.9959404698241037e-05, "loss": 0.5572, "step": 484 }, { "epoch": 0.1081864822663395, "grad_norm": 0.1900031566619873, "learning_rate": 1.9959192580069567e-05, "loss": 0.5629, "step": 485 }, { "epoch": 0.10840954717822887, "grad_norm": 0.2330961972475052, "learning_rate": 1.9958979910295367e-05, "loss": 0.5725, "step": 486 }, { "epoch": 0.10863261209011822, "grad_norm": 0.23831723630428314, "learning_rate": 1.9958766688930215e-05, "loss": 0.5549, "step": 487 }, { "epoch": 0.10885567700200759, "grad_norm": 0.2095993012189865, "learning_rate": 1.9958552915985923e-05, "loss": 0.541, "step": 488 }, { "epoch": 0.10907874191389694, "grad_norm": 0.18837721645832062, "learning_rate": 1.9958338591474327e-05, "loss": 0.5245, "step": 489 }, { "epoch": 0.1093018068257863, "grad_norm": 0.19427752494812012, "learning_rate": 1.99581237154073e-05, "loss": 0.5442, "step": 490 }, { "epoch": 0.10952487173767567, "grad_norm": 0.19493944942951202, "learning_rate": 1.9957908287796743e-05, "loss": 0.5559, "step": 491 }, { "epoch": 0.10974793664956502, "grad_norm": 0.18494555354118347, "learning_rate": 1.9957692308654586e-05, "loss": 0.5427, "step": 492 }, { "epoch": 0.10997100156145438, "grad_norm": 0.19978608191013336, "learning_rate": 1.9957475777992794e-05, "loss": 0.5391, "step": 493 }, { "epoch": 0.11019406647334375, "grad_norm": 0.18339702486991882, "learning_rate": 1.9957258695823358e-05, "loss": 0.5454, "step": 494 }, { "epoch": 0.1104171313852331, "grad_norm": 0.18613296747207642, "learning_rate": 1.99570410621583e-05, "loss": 0.5371, "step": 495 }, { "epoch": 0.11064019629712246, "grad_norm": 0.19776608049869537, "learning_rate": 1.9956822877009676e-05, "loss": 0.5405, "step": 496 }, { "epoch": 0.11086326120901183, "grad_norm": 0.1891903132200241, "learning_rate": 1.9956604140389574e-05, "loss": 0.5755, "step": 497 }, { "epoch": 0.11108632612090118, "grad_norm": 0.20654836297035217, "learning_rate": 1.9956384852310102e-05, "loss": 0.5471, "step": 498 }, { "epoch": 0.11130939103279054, "grad_norm": 0.18682821094989777, "learning_rate": 1.995616501278341e-05, "loss": 0.5479, "step": 499 }, { "epoch": 0.1115324559446799, "grad_norm": 0.20868845283985138, "learning_rate": 1.995594462182167e-05, "loss": 0.5405, "step": 500 }, { "epoch": 0.11175552085656926, "grad_norm": 0.20793192088603973, "learning_rate": 1.9955723679437093e-05, "loss": 0.5468, "step": 501 }, { "epoch": 0.11197858576845862, "grad_norm": 0.18734110891819, "learning_rate": 1.9955502185641915e-05, "loss": 0.5336, "step": 502 }, { "epoch": 0.11220165068034799, "grad_norm": 0.18563862144947052, "learning_rate": 1.9955280140448404e-05, "loss": 0.549, "step": 503 }, { "epoch": 0.11242471559223734, "grad_norm": 0.19853924214839935, "learning_rate": 1.9955057543868858e-05, "loss": 0.5494, "step": 504 }, { "epoch": 0.1126477805041267, "grad_norm": 0.2054789960384369, "learning_rate": 1.9954834395915604e-05, "loss": 0.6006, "step": 505 }, { "epoch": 0.11287084541601607, "grad_norm": 0.1880260407924652, "learning_rate": 1.9954610696601e-05, "loss": 0.5664, "step": 506 }, { "epoch": 0.11309391032790542, "grad_norm": 0.20359358191490173, "learning_rate": 1.9954386445937444e-05, "loss": 0.5387, "step": 507 }, { "epoch": 0.11331697523979478, "grad_norm": 0.20446328818798065, "learning_rate": 1.995416164393735e-05, "loss": 0.6078, "step": 508 }, { "epoch": 0.11354004015168415, "grad_norm": 0.20027638971805573, "learning_rate": 1.9953936290613166e-05, "loss": 0.4875, "step": 509 }, { "epoch": 0.1137631050635735, "grad_norm": 0.19979824125766754, "learning_rate": 1.9953710385977382e-05, "loss": 0.5599, "step": 510 }, { "epoch": 0.11398616997546286, "grad_norm": 0.17714561522006989, "learning_rate": 1.9953483930042503e-05, "loss": 0.536, "step": 511 }, { "epoch": 0.11420923488735223, "grad_norm": 0.21002604067325592, "learning_rate": 1.9953256922821075e-05, "loss": 0.5388, "step": 512 }, { "epoch": 0.11443229979924158, "grad_norm": 0.19017495214939117, "learning_rate": 1.995302936432567e-05, "loss": 0.5474, "step": 513 }, { "epoch": 0.11465536471113094, "grad_norm": 0.1896909773349762, "learning_rate": 1.995280125456889e-05, "loss": 0.5577, "step": 514 }, { "epoch": 0.11487842962302029, "grad_norm": 0.20575307309627533, "learning_rate": 1.9952572593563375e-05, "loss": 0.5204, "step": 515 }, { "epoch": 0.11510149453490966, "grad_norm": 0.19565477967262268, "learning_rate": 1.9952343381321785e-05, "loss": 0.5971, "step": 516 }, { "epoch": 0.11532455944679902, "grad_norm": 0.1984451860189438, "learning_rate": 1.995211361785681e-05, "loss": 0.5531, "step": 517 }, { "epoch": 0.11554762435868837, "grad_norm": 0.18850469589233398, "learning_rate": 1.9951883303181184e-05, "loss": 0.5419, "step": 518 }, { "epoch": 0.11577068927057774, "grad_norm": 0.22787964344024658, "learning_rate": 1.9951652437307664e-05, "loss": 0.5403, "step": 519 }, { "epoch": 0.1159937541824671, "grad_norm": 0.29980340600013733, "learning_rate": 1.995142102024903e-05, "loss": 0.5365, "step": 520 }, { "epoch": 0.11621681909435645, "grad_norm": 0.17774684727191925, "learning_rate": 1.995118905201811e-05, "loss": 0.545, "step": 521 }, { "epoch": 0.11643988400624582, "grad_norm": 0.20445318520069122, "learning_rate": 1.995095653262774e-05, "loss": 0.5883, "step": 522 }, { "epoch": 0.11666294891813518, "grad_norm": 0.1800977885723114, "learning_rate": 1.9950723462090803e-05, "loss": 0.5579, "step": 523 }, { "epoch": 0.11688601383002453, "grad_norm": 0.18807321786880493, "learning_rate": 1.9950489840420207e-05, "loss": 0.5295, "step": 524 }, { "epoch": 0.1171090787419139, "grad_norm": 0.17663832008838654, "learning_rate": 1.9950255667628894e-05, "loss": 0.5377, "step": 525 }, { "epoch": 0.11733214365380326, "grad_norm": 0.18556755781173706, "learning_rate": 1.9950020943729834e-05, "loss": 0.5314, "step": 526 }, { "epoch": 0.11755520856569261, "grad_norm": 0.18376345932483673, "learning_rate": 1.994978566873602e-05, "loss": 0.5383, "step": 527 }, { "epoch": 0.11777827347758198, "grad_norm": 0.18771792948246002, "learning_rate": 1.9949549842660495e-05, "loss": 0.5273, "step": 528 }, { "epoch": 0.11800133838947134, "grad_norm": 0.1931321620941162, "learning_rate": 1.9949313465516312e-05, "loss": 0.5692, "step": 529 }, { "epoch": 0.11822440330136069, "grad_norm": 0.18806159496307373, "learning_rate": 1.9949076537316566e-05, "loss": 0.5375, "step": 530 }, { "epoch": 0.11844746821325006, "grad_norm": 0.18746712803840637, "learning_rate": 1.9948839058074383e-05, "loss": 0.5418, "step": 531 }, { "epoch": 0.11867053312513942, "grad_norm": 0.18869024515151978, "learning_rate": 1.9948601027802908e-05, "loss": 0.5059, "step": 532 }, { "epoch": 0.11889359803702877, "grad_norm": 0.20482954382896423, "learning_rate": 1.994836244651533e-05, "loss": 0.5359, "step": 533 }, { "epoch": 0.11911666294891814, "grad_norm": 0.17584972083568573, "learning_rate": 1.9948123314224862e-05, "loss": 0.5221, "step": 534 }, { "epoch": 0.1193397278608075, "grad_norm": 0.1878448724746704, "learning_rate": 1.994788363094475e-05, "loss": 0.5358, "step": 535 }, { "epoch": 0.11956279277269685, "grad_norm": 0.17549312114715576, "learning_rate": 1.9947643396688266e-05, "loss": 0.5391, "step": 536 }, { "epoch": 0.11978585768458622, "grad_norm": 0.18577958643436432, "learning_rate": 1.9947402611468714e-05, "loss": 0.5606, "step": 537 }, { "epoch": 0.12000892259647558, "grad_norm": 0.17562542855739594, "learning_rate": 1.994716127529944e-05, "loss": 0.5186, "step": 538 }, { "epoch": 0.12023198750836493, "grad_norm": 0.18019866943359375, "learning_rate": 1.9946919388193803e-05, "loss": 0.524, "step": 539 }, { "epoch": 0.1204550524202543, "grad_norm": 0.18850797414779663, "learning_rate": 1.99466769501652e-05, "loss": 0.5496, "step": 540 }, { "epoch": 0.12067811733214365, "grad_norm": 0.1817145198583603, "learning_rate": 1.9946433961227062e-05, "loss": 0.5303, "step": 541 }, { "epoch": 0.12090118224403301, "grad_norm": 0.2111491858959198, "learning_rate": 1.9946190421392845e-05, "loss": 0.5526, "step": 542 }, { "epoch": 0.12112424715592238, "grad_norm": 0.294716477394104, "learning_rate": 1.9945946330676036e-05, "loss": 0.5172, "step": 543 }, { "epoch": 0.12134731206781173, "grad_norm": 0.21202930808067322, "learning_rate": 1.994570168909016e-05, "loss": 0.5295, "step": 544 }, { "epoch": 0.12157037697970109, "grad_norm": 0.18554560840129852, "learning_rate": 1.9945456496648763e-05, "loss": 0.5347, "step": 545 }, { "epoch": 0.12179344189159046, "grad_norm": 0.18445701897144318, "learning_rate": 1.9945210753365426e-05, "loss": 0.5433, "step": 546 }, { "epoch": 0.12201650680347981, "grad_norm": 0.2190207540988922, "learning_rate": 1.9944964459253757e-05, "loss": 0.5342, "step": 547 }, { "epoch": 0.12223957171536917, "grad_norm": 0.19571848213672638, "learning_rate": 1.99447176143274e-05, "loss": 0.5675, "step": 548 }, { "epoch": 0.12246263662725854, "grad_norm": 0.1853405386209488, "learning_rate": 1.994447021860003e-05, "loss": 0.5857, "step": 549 }, { "epoch": 0.12268570153914789, "grad_norm": 0.19547483325004578, "learning_rate": 1.9944222272085344e-05, "loss": 0.5208, "step": 550 }, { "epoch": 0.12290876645103725, "grad_norm": 0.20552557706832886, "learning_rate": 1.994397377479708e-05, "loss": 0.5518, "step": 551 }, { "epoch": 0.12313183136292662, "grad_norm": 0.21979011595249176, "learning_rate": 1.9943724726748996e-05, "loss": 0.5658, "step": 552 }, { "epoch": 0.12335489627481597, "grad_norm": 0.18478159606456757, "learning_rate": 1.994347512795489e-05, "loss": 0.5599, "step": 553 }, { "epoch": 0.12357796118670533, "grad_norm": 0.20098286867141724, "learning_rate": 1.9943224978428582e-05, "loss": 0.5455, "step": 554 }, { "epoch": 0.1238010260985947, "grad_norm": 0.2217499017715454, "learning_rate": 1.994297427818393e-05, "loss": 0.5512, "step": 555 }, { "epoch": 0.12402409101048405, "grad_norm": 0.17720763385295868, "learning_rate": 1.9942723027234817e-05, "loss": 0.5295, "step": 556 }, { "epoch": 0.12424715592237341, "grad_norm": 0.1762504130601883, "learning_rate": 1.9942471225595162e-05, "loss": 0.5361, "step": 557 }, { "epoch": 0.12447022083426278, "grad_norm": 0.2265862226486206, "learning_rate": 1.994221887327891e-05, "loss": 0.5669, "step": 558 }, { "epoch": 0.12469328574615213, "grad_norm": 0.19906531274318695, "learning_rate": 1.994196597030004e-05, "loss": 0.5302, "step": 559 }, { "epoch": 0.12491635065804149, "grad_norm": 0.17008185386657715, "learning_rate": 1.9941712516672553e-05, "loss": 0.5333, "step": 560 }, { "epoch": 0.12513941556993086, "grad_norm": 0.18816471099853516, "learning_rate": 1.994145851241049e-05, "loss": 0.5679, "step": 561 }, { "epoch": 0.12536248048182022, "grad_norm": 0.19027023017406464, "learning_rate": 1.9941203957527927e-05, "loss": 0.5507, "step": 562 }, { "epoch": 0.12558554539370956, "grad_norm": 0.17551641166210175, "learning_rate": 1.994094885203895e-05, "loss": 0.5311, "step": 563 }, { "epoch": 0.12580861030559892, "grad_norm": 0.1766747236251831, "learning_rate": 1.9940693195957696e-05, "loss": 0.5279, "step": 564 }, { "epoch": 0.1260316752174883, "grad_norm": 0.1826571673154831, "learning_rate": 1.9940436989298322e-05, "loss": 0.5195, "step": 565 }, { "epoch": 0.12625474012937765, "grad_norm": 0.18142053484916687, "learning_rate": 1.9940180232075025e-05, "loss": 0.5678, "step": 566 }, { "epoch": 0.12647780504126702, "grad_norm": 0.21495957672595978, "learning_rate": 1.993992292430202e-05, "loss": 0.5393, "step": 567 }, { "epoch": 0.12670086995315638, "grad_norm": 0.18905828893184662, "learning_rate": 1.9939665065993556e-05, "loss": 0.5507, "step": 568 }, { "epoch": 0.12692393486504572, "grad_norm": 0.19366420805454254, "learning_rate": 1.9939406657163916e-05, "loss": 0.5458, "step": 569 }, { "epoch": 0.12714699977693508, "grad_norm": 0.20456644892692566, "learning_rate": 1.9939147697827415e-05, "loss": 0.5532, "step": 570 }, { "epoch": 0.12737006468882445, "grad_norm": 0.20060132443904877, "learning_rate": 1.9938888187998397e-05, "loss": 0.5865, "step": 571 }, { "epoch": 0.1275931296007138, "grad_norm": 0.18896770477294922, "learning_rate": 1.9938628127691232e-05, "loss": 0.5372, "step": 572 }, { "epoch": 0.12781619451260318, "grad_norm": 0.1824081540107727, "learning_rate": 1.9938367516920323e-05, "loss": 0.5232, "step": 573 }, { "epoch": 0.12803925942449254, "grad_norm": 0.4999215602874756, "learning_rate": 1.993810635570011e-05, "loss": 0.5323, "step": 574 }, { "epoch": 0.12826232433638188, "grad_norm": 0.23097532987594604, "learning_rate": 1.993784464404505e-05, "loss": 0.544, "step": 575 }, { "epoch": 0.12848538924827124, "grad_norm": 0.1975661814212799, "learning_rate": 1.993758238196964e-05, "loss": 0.5411, "step": 576 }, { "epoch": 0.1287084541601606, "grad_norm": 0.18706879019737244, "learning_rate": 1.9937319569488414e-05, "loss": 0.5386, "step": 577 }, { "epoch": 0.12893151907204997, "grad_norm": 0.19232505559921265, "learning_rate": 1.993705620661592e-05, "loss": 0.5327, "step": 578 }, { "epoch": 0.12915458398393934, "grad_norm": 0.20714089274406433, "learning_rate": 1.9936792293366744e-05, "loss": 0.5381, "step": 579 }, { "epoch": 0.12937764889582867, "grad_norm": 0.18375153839588165, "learning_rate": 1.993652782975551e-05, "loss": 0.5664, "step": 580 }, { "epoch": 0.12960071380771804, "grad_norm": 0.19149671494960785, "learning_rate": 1.993626281579686e-05, "loss": 0.5265, "step": 581 }, { "epoch": 0.1298237787196074, "grad_norm": 0.19362886250019073, "learning_rate": 1.9935997251505473e-05, "loss": 0.517, "step": 582 }, { "epoch": 0.13004684363149677, "grad_norm": 0.19237902760505676, "learning_rate": 1.993573113689606e-05, "loss": 0.529, "step": 583 }, { "epoch": 0.13026990854338613, "grad_norm": 0.2098451405763626, "learning_rate": 1.9935464471983354e-05, "loss": 0.536, "step": 584 }, { "epoch": 0.1304929734552755, "grad_norm": 0.17832371592521667, "learning_rate": 1.993519725678213e-05, "loss": 0.528, "step": 585 }, { "epoch": 0.13071603836716483, "grad_norm": 0.19082553684711456, "learning_rate": 1.9934929491307194e-05, "loss": 0.5208, "step": 586 }, { "epoch": 0.1309391032790542, "grad_norm": 0.19741079211235046, "learning_rate": 1.9934661175573363e-05, "loss": 0.5605, "step": 587 }, { "epoch": 0.13116216819094356, "grad_norm": 0.17371487617492676, "learning_rate": 1.9934392309595504e-05, "loss": 0.5462, "step": 588 }, { "epoch": 0.13138523310283293, "grad_norm": 0.17552132904529572, "learning_rate": 1.9934122893388512e-05, "loss": 0.5343, "step": 589 }, { "epoch": 0.1316082980147223, "grad_norm": 0.19698654115200043, "learning_rate": 1.9933852926967305e-05, "loss": 0.5711, "step": 590 }, { "epoch": 0.13183136292661166, "grad_norm": 0.19045893847942352, "learning_rate": 1.993358241034684e-05, "loss": 0.5601, "step": 591 }, { "epoch": 0.132054427838501, "grad_norm": 0.17538850009441376, "learning_rate": 1.9933311343542094e-05, "loss": 0.5403, "step": 592 }, { "epoch": 0.13227749275039036, "grad_norm": 0.1708664894104004, "learning_rate": 1.9933039726568078e-05, "loss": 0.5346, "step": 593 }, { "epoch": 0.13250055766227972, "grad_norm": 0.1795656532049179, "learning_rate": 1.9932767559439844e-05, "loss": 0.5516, "step": 594 }, { "epoch": 0.1327236225741691, "grad_norm": 0.18116529285907745, "learning_rate": 1.9932494842172465e-05, "loss": 0.5311, "step": 595 }, { "epoch": 0.13294668748605845, "grad_norm": 0.20366770029067993, "learning_rate": 1.9932221574781043e-05, "loss": 0.5546, "step": 596 }, { "epoch": 0.13316975239794782, "grad_norm": 0.1805567592382431, "learning_rate": 1.9931947757280713e-05, "loss": 0.5754, "step": 597 }, { "epoch": 0.13339281730983715, "grad_norm": 0.1815255880355835, "learning_rate": 1.9931673389686642e-05, "loss": 0.5172, "step": 598 }, { "epoch": 0.13361588222172652, "grad_norm": 0.1993352770805359, "learning_rate": 1.9931398472014024e-05, "loss": 0.5483, "step": 599 }, { "epoch": 0.13383894713361588, "grad_norm": 0.1812392920255661, "learning_rate": 1.993112300427809e-05, "loss": 0.5418, "step": 600 }, { "epoch": 0.13406201204550525, "grad_norm": 0.23115472495555878, "learning_rate": 1.9930846986494098e-05, "loss": 0.5661, "step": 601 }, { "epoch": 0.1342850769573946, "grad_norm": 0.1799275279045105, "learning_rate": 1.9930570418677327e-05, "loss": 0.5536, "step": 602 }, { "epoch": 0.13450814186928395, "grad_norm": 0.19140706956386566, "learning_rate": 1.9930293300843103e-05, "loss": 0.5405, "step": 603 }, { "epoch": 0.1347312067811733, "grad_norm": 0.20953412353992462, "learning_rate": 1.993001563300677e-05, "loss": 0.5215, "step": 604 }, { "epoch": 0.13495427169306268, "grad_norm": 0.18991845846176147, "learning_rate": 1.992973741518371e-05, "loss": 0.5577, "step": 605 }, { "epoch": 0.13517733660495204, "grad_norm": 0.18286292254924774, "learning_rate": 1.9929458647389333e-05, "loss": 0.5531, "step": 606 }, { "epoch": 0.1354004015168414, "grad_norm": 0.1848326176404953, "learning_rate": 1.9929179329639075e-05, "loss": 0.5387, "step": 607 }, { "epoch": 0.13562346642873077, "grad_norm": 0.17560309171676636, "learning_rate": 1.9928899461948407e-05, "loss": 0.5444, "step": 608 }, { "epoch": 0.1358465313406201, "grad_norm": 0.18911704421043396, "learning_rate": 1.9928619044332837e-05, "loss": 0.506, "step": 609 }, { "epoch": 0.13606959625250947, "grad_norm": 0.19542188942432404, "learning_rate": 1.9928338076807888e-05, "loss": 0.5222, "step": 610 }, { "epoch": 0.13629266116439884, "grad_norm": 0.16921466588974, "learning_rate": 1.9928056559389123e-05, "loss": 0.5095, "step": 611 }, { "epoch": 0.1365157260762882, "grad_norm": 0.19137442111968994, "learning_rate": 1.9927774492092137e-05, "loss": 0.5504, "step": 612 }, { "epoch": 0.13673879098817757, "grad_norm": 0.2050313800573349, "learning_rate": 1.9927491874932553e-05, "loss": 0.5695, "step": 613 }, { "epoch": 0.13696185590006693, "grad_norm": 0.18225258588790894, "learning_rate": 1.992720870792602e-05, "loss": 0.5286, "step": 614 }, { "epoch": 0.13718492081195627, "grad_norm": 0.18813620507717133, "learning_rate": 1.9926924991088226e-05, "loss": 0.5362, "step": 615 }, { "epoch": 0.13740798572384563, "grad_norm": 0.21695192158222198, "learning_rate": 1.9926640724434882e-05, "loss": 0.5548, "step": 616 }, { "epoch": 0.137631050635735, "grad_norm": 0.1780216544866562, "learning_rate": 1.9926355907981735e-05, "loss": 0.547, "step": 617 }, { "epoch": 0.13785411554762436, "grad_norm": 0.182960644364357, "learning_rate": 1.9926070541744557e-05, "loss": 0.524, "step": 618 }, { "epoch": 0.13807718045951373, "grad_norm": 0.18580903112888336, "learning_rate": 1.9925784625739157e-05, "loss": 0.5515, "step": 619 }, { "epoch": 0.1383002453714031, "grad_norm": 0.17631803452968597, "learning_rate": 1.9925498159981368e-05, "loss": 0.5269, "step": 620 }, { "epoch": 0.13852331028329243, "grad_norm": 0.1774953305721283, "learning_rate": 1.9925211144487057e-05, "loss": 0.5279, "step": 621 }, { "epoch": 0.1387463751951818, "grad_norm": 0.1902550458908081, "learning_rate": 1.992492357927212e-05, "loss": 0.5199, "step": 622 }, { "epoch": 0.13896944010707116, "grad_norm": 0.18148526549339294, "learning_rate": 1.9924635464352486e-05, "loss": 0.5273, "step": 623 }, { "epoch": 0.13919250501896052, "grad_norm": 0.18067635595798492, "learning_rate": 1.9924346799744108e-05, "loss": 0.5431, "step": 624 }, { "epoch": 0.1394155699308499, "grad_norm": 0.1752486228942871, "learning_rate": 1.992405758546298e-05, "loss": 0.5012, "step": 625 }, { "epoch": 0.13963863484273925, "grad_norm": 0.18866072595119476, "learning_rate": 1.992376782152512e-05, "loss": 0.5408, "step": 626 }, { "epoch": 0.1398616997546286, "grad_norm": 0.21433256566524506, "learning_rate": 1.9923477507946573e-05, "loss": 0.5625, "step": 627 }, { "epoch": 0.14008476466651795, "grad_norm": 0.17377375066280365, "learning_rate": 1.9923186644743425e-05, "loss": 0.516, "step": 628 }, { "epoch": 0.14030782957840732, "grad_norm": 0.19306115806102753, "learning_rate": 1.9922895231931775e-05, "loss": 0.5397, "step": 629 }, { "epoch": 0.14053089449029668, "grad_norm": 0.18144343793392181, "learning_rate": 1.992260326952777e-05, "loss": 0.5215, "step": 630 }, { "epoch": 0.14075395940218605, "grad_norm": 0.19684460759162903, "learning_rate": 1.9922310757547584e-05, "loss": 0.5472, "step": 631 }, { "epoch": 0.14097702431407538, "grad_norm": 0.18824423849582672, "learning_rate": 1.9922017696007413e-05, "loss": 0.5595, "step": 632 }, { "epoch": 0.14120008922596475, "grad_norm": 0.18633319437503815, "learning_rate": 1.992172408492349e-05, "loss": 0.568, "step": 633 }, { "epoch": 0.1414231541378541, "grad_norm": 0.18547959625720978, "learning_rate": 1.9921429924312074e-05, "loss": 0.5421, "step": 634 }, { "epoch": 0.14164621904974348, "grad_norm": 0.16735389828681946, "learning_rate": 1.9921135214189466e-05, "loss": 0.499, "step": 635 }, { "epoch": 0.14186928396163284, "grad_norm": 0.1912708729505539, "learning_rate": 1.992083995457198e-05, "loss": 0.5655, "step": 636 }, { "epoch": 0.1420923488735222, "grad_norm": 0.19804726541042328, "learning_rate": 1.9920544145475975e-05, "loss": 0.5639, "step": 637 }, { "epoch": 0.14231541378541154, "grad_norm": 0.17582711577415466, "learning_rate": 1.992024778691783e-05, "loss": 0.5577, "step": 638 }, { "epoch": 0.1425384786973009, "grad_norm": 0.18506725132465363, "learning_rate": 1.9919950878913962e-05, "loss": 0.5287, "step": 639 }, { "epoch": 0.14276154360919027, "grad_norm": 0.1914191097021103, "learning_rate": 1.9919653421480816e-05, "loss": 0.5125, "step": 640 }, { "epoch": 0.14298460852107964, "grad_norm": 0.17707392573356628, "learning_rate": 1.9919355414634864e-05, "loss": 0.5351, "step": 641 }, { "epoch": 0.143207673432969, "grad_norm": 0.1675226092338562, "learning_rate": 1.9919056858392618e-05, "loss": 0.5408, "step": 642 }, { "epoch": 0.14343073834485837, "grad_norm": 0.18883782625198364, "learning_rate": 1.9918757752770607e-05, "loss": 0.5387, "step": 643 }, { "epoch": 0.1436538032567477, "grad_norm": 0.18036174774169922, "learning_rate": 1.99184580977854e-05, "loss": 0.5444, "step": 644 }, { "epoch": 0.14387686816863707, "grad_norm": 0.1817016750574112, "learning_rate": 1.99181578934536e-05, "loss": 0.527, "step": 645 }, { "epoch": 0.14409993308052643, "grad_norm": 0.18899092078208923, "learning_rate": 1.991785713979182e-05, "loss": 0.5245, "step": 646 }, { "epoch": 0.1443229979924158, "grad_norm": 0.18643809854984283, "learning_rate": 1.991755583681673e-05, "loss": 0.5247, "step": 647 }, { "epoch": 0.14454606290430516, "grad_norm": 0.1903519332408905, "learning_rate": 1.9917253984545014e-05, "loss": 0.5497, "step": 648 }, { "epoch": 0.14476912781619453, "grad_norm": 0.1695372760295868, "learning_rate": 1.991695158299339e-05, "loss": 0.5535, "step": 649 }, { "epoch": 0.14499219272808386, "grad_norm": 0.19261977076530457, "learning_rate": 1.9916648632178605e-05, "loss": 0.5324, "step": 650 }, { "epoch": 0.14521525763997323, "grad_norm": 0.1795564889907837, "learning_rate": 1.9916345132117442e-05, "loss": 0.5316, "step": 651 }, { "epoch": 0.1454383225518626, "grad_norm": 0.27002382278442383, "learning_rate": 1.9916041082826713e-05, "loss": 0.5521, "step": 652 }, { "epoch": 0.14566138746375196, "grad_norm": 0.17605867981910706, "learning_rate": 1.9915736484323246e-05, "loss": 0.5228, "step": 653 }, { "epoch": 0.14588445237564132, "grad_norm": 0.1808420866727829, "learning_rate": 1.9915431336623928e-05, "loss": 0.5593, "step": 654 }, { "epoch": 0.14610751728753066, "grad_norm": 0.189579576253891, "learning_rate": 1.991512563974565e-05, "loss": 0.5405, "step": 655 }, { "epoch": 0.14633058219942002, "grad_norm": 0.1883007138967514, "learning_rate": 1.9914819393705342e-05, "loss": 0.5571, "step": 656 }, { "epoch": 0.1465536471113094, "grad_norm": 0.18864606320858002, "learning_rate": 1.9914512598519972e-05, "loss": 0.519, "step": 657 }, { "epoch": 0.14677671202319875, "grad_norm": 0.17059919238090515, "learning_rate": 1.9914205254206527e-05, "loss": 0.533, "step": 658 }, { "epoch": 0.14699977693508812, "grad_norm": 0.4270278215408325, "learning_rate": 1.9913897360782036e-05, "loss": 0.5282, "step": 659 }, { "epoch": 0.14722284184697748, "grad_norm": 0.18411633372306824, "learning_rate": 1.9913588918263545e-05, "loss": 0.5128, "step": 660 }, { "epoch": 0.14744590675886682, "grad_norm": 0.2003640979528427, "learning_rate": 1.9913279926668146e-05, "loss": 0.5213, "step": 661 }, { "epoch": 0.14766897167075618, "grad_norm": 0.18532606959342957, "learning_rate": 1.9912970386012943e-05, "loss": 0.498, "step": 662 }, { "epoch": 0.14789203658264555, "grad_norm": 0.1674346923828125, "learning_rate": 1.9912660296315083e-05, "loss": 0.5383, "step": 663 }, { "epoch": 0.1481151014945349, "grad_norm": 0.19120754301548004, "learning_rate": 1.9912349657591748e-05, "loss": 0.5301, "step": 664 }, { "epoch": 0.14833816640642428, "grad_norm": 0.178116112947464, "learning_rate": 1.9912038469860135e-05, "loss": 0.5583, "step": 665 }, { "epoch": 0.14856123131831364, "grad_norm": 0.1826133131980896, "learning_rate": 1.9911726733137484e-05, "loss": 0.5188, "step": 666 }, { "epoch": 0.14878429623020298, "grad_norm": 0.18221695721149445, "learning_rate": 1.991141444744106e-05, "loss": 0.5236, "step": 667 }, { "epoch": 0.14900736114209234, "grad_norm": 0.18043336272239685, "learning_rate": 1.9911101612788157e-05, "loss": 0.5201, "step": 668 }, { "epoch": 0.1492304260539817, "grad_norm": 0.19176125526428223, "learning_rate": 1.9910788229196104e-05, "loss": 0.5501, "step": 669 }, { "epoch": 0.14945349096587107, "grad_norm": 0.18294784426689148, "learning_rate": 1.9910474296682256e-05, "loss": 0.5304, "step": 670 }, { "epoch": 0.14967655587776044, "grad_norm": 0.19021780788898468, "learning_rate": 1.9910159815264e-05, "loss": 0.5561, "step": 671 }, { "epoch": 0.1498996207896498, "grad_norm": 0.18545284867286682, "learning_rate": 1.9909844784958762e-05, "loss": 0.536, "step": 672 }, { "epoch": 0.15012268570153914, "grad_norm": 0.2078470140695572, "learning_rate": 1.990952920578398e-05, "loss": 0.513, "step": 673 }, { "epoch": 0.1503457506134285, "grad_norm": 0.20093394815921783, "learning_rate": 1.9909213077757138e-05, "loss": 0.5406, "step": 674 }, { "epoch": 0.15056881552531787, "grad_norm": 0.212370365858078, "learning_rate": 1.9908896400895745e-05, "loss": 0.5108, "step": 675 }, { "epoch": 0.15079188043720723, "grad_norm": 0.1903519630432129, "learning_rate": 1.990857917521734e-05, "loss": 0.5163, "step": 676 }, { "epoch": 0.1510149453490966, "grad_norm": 0.18388496339321136, "learning_rate": 1.9908261400739494e-05, "loss": 0.5361, "step": 677 }, { "epoch": 0.15123801026098593, "grad_norm": 0.1817573606967926, "learning_rate": 1.9907943077479802e-05, "loss": 0.5582, "step": 678 }, { "epoch": 0.1514610751728753, "grad_norm": 0.19696584343910217, "learning_rate": 1.9907624205455903e-05, "loss": 0.5326, "step": 679 }, { "epoch": 0.15168414008476466, "grad_norm": 0.17569434642791748, "learning_rate": 1.990730478468545e-05, "loss": 0.5073, "step": 680 }, { "epoch": 0.15190720499665403, "grad_norm": 0.1812848299741745, "learning_rate": 1.9906984815186142e-05, "loss": 0.539, "step": 681 }, { "epoch": 0.1521302699085434, "grad_norm": 0.1694687008857727, "learning_rate": 1.9906664296975696e-05, "loss": 0.5286, "step": 682 }, { "epoch": 0.15235333482043276, "grad_norm": 0.18631073832511902, "learning_rate": 1.990634323007187e-05, "loss": 0.5807, "step": 683 }, { "epoch": 0.1525763997323221, "grad_norm": 0.18517658114433289, "learning_rate": 1.9906021614492438e-05, "loss": 0.5393, "step": 684 }, { "epoch": 0.15279946464421146, "grad_norm": 0.19088414311408997, "learning_rate": 1.990569945025522e-05, "loss": 0.5583, "step": 685 }, { "epoch": 0.15302252955610082, "grad_norm": 0.1954737901687622, "learning_rate": 1.9905376737378056e-05, "loss": 0.5348, "step": 686 }, { "epoch": 0.1532455944679902, "grad_norm": 0.1780342310667038, "learning_rate": 1.990505347587882e-05, "loss": 0.5083, "step": 687 }, { "epoch": 0.15346865937987955, "grad_norm": 0.18818046152591705, "learning_rate": 1.9904729665775417e-05, "loss": 0.51, "step": 688 }, { "epoch": 0.15369172429176892, "grad_norm": 0.1797480434179306, "learning_rate": 1.990440530708578e-05, "loss": 0.5114, "step": 689 }, { "epoch": 0.15391478920365825, "grad_norm": 0.18315070867538452, "learning_rate": 1.9904080399827883e-05, "loss": 0.5322, "step": 690 }, { "epoch": 0.15413785411554762, "grad_norm": 0.2024601846933365, "learning_rate": 1.990375494401971e-05, "loss": 0.5198, "step": 691 }, { "epoch": 0.15436091902743698, "grad_norm": 0.2151622325181961, "learning_rate": 1.990342893967929e-05, "loss": 0.5308, "step": 692 }, { "epoch": 0.15458398393932635, "grad_norm": 0.1977054476737976, "learning_rate": 1.990310238682468e-05, "loss": 0.579, "step": 693 }, { "epoch": 0.1548070488512157, "grad_norm": 0.1899496465921402, "learning_rate": 1.990277528547397e-05, "loss": 0.5435, "step": 694 }, { "epoch": 0.15503011376310508, "grad_norm": 0.18425041437149048, "learning_rate": 1.9902447635645273e-05, "loss": 0.5582, "step": 695 }, { "epoch": 0.1552531786749944, "grad_norm": 0.1868348866701126, "learning_rate": 1.9902119437356737e-05, "loss": 0.5208, "step": 696 }, { "epoch": 0.15547624358688378, "grad_norm": 0.19318504631519318, "learning_rate": 1.990179069062654e-05, "loss": 0.581, "step": 697 }, { "epoch": 0.15569930849877314, "grad_norm": 0.17516325414180756, "learning_rate": 1.990146139547289e-05, "loss": 0.522, "step": 698 }, { "epoch": 0.1559223734106625, "grad_norm": 0.18644174933433533, "learning_rate": 1.990113155191402e-05, "loss": 0.5361, "step": 699 }, { "epoch": 0.15614543832255187, "grad_norm": 0.204257071018219, "learning_rate": 1.9900801159968207e-05, "loss": 0.5216, "step": 700 }, { "epoch": 0.15636850323444124, "grad_norm": 0.19151191413402557, "learning_rate": 1.990047021965375e-05, "loss": 0.5476, "step": 701 }, { "epoch": 0.15659156814633057, "grad_norm": 0.18692530691623688, "learning_rate": 1.9900138730988976e-05, "loss": 0.5516, "step": 702 }, { "epoch": 0.15681463305821994, "grad_norm": 0.19439953565597534, "learning_rate": 1.9899806693992242e-05, "loss": 0.5404, "step": 703 }, { "epoch": 0.1570376979701093, "grad_norm": 0.1809748411178589, "learning_rate": 1.989947410868194e-05, "loss": 0.5571, "step": 704 }, { "epoch": 0.15726076288199867, "grad_norm": 0.19498126208782196, "learning_rate": 1.9899140975076495e-05, "loss": 0.5121, "step": 705 }, { "epoch": 0.15748382779388803, "grad_norm": 0.19838570058345795, "learning_rate": 1.9898807293194352e-05, "loss": 0.5535, "step": 706 }, { "epoch": 0.15770689270577737, "grad_norm": 0.18156638741493225, "learning_rate": 1.9898473063054e-05, "loss": 0.5275, "step": 707 }, { "epoch": 0.15792995761766673, "grad_norm": 0.1888674944639206, "learning_rate": 1.989813828467394e-05, "loss": 0.5274, "step": 708 }, { "epoch": 0.1581530225295561, "grad_norm": 0.2016175538301468, "learning_rate": 1.9897802958072722e-05, "loss": 0.5057, "step": 709 }, { "epoch": 0.15837608744144546, "grad_norm": 0.21129021048545837, "learning_rate": 1.989746708326892e-05, "loss": 0.5691, "step": 710 }, { "epoch": 0.15859915235333483, "grad_norm": 0.20351482927799225, "learning_rate": 1.9897130660281127e-05, "loss": 0.5513, "step": 711 }, { "epoch": 0.1588222172652242, "grad_norm": 0.2297467589378357, "learning_rate": 1.9896793689127988e-05, "loss": 0.5618, "step": 712 }, { "epoch": 0.15904528217711353, "grad_norm": 0.222783625125885, "learning_rate": 1.989645616982816e-05, "loss": 0.5336, "step": 713 }, { "epoch": 0.1592683470890029, "grad_norm": 0.1896630972623825, "learning_rate": 1.9896118102400334e-05, "loss": 0.5562, "step": 714 }, { "epoch": 0.15949141200089226, "grad_norm": 0.188043013215065, "learning_rate": 1.989577948686324e-05, "loss": 0.497, "step": 715 }, { "epoch": 0.15971447691278162, "grad_norm": 0.21135394275188446, "learning_rate": 1.9895440323235635e-05, "loss": 0.5714, "step": 716 }, { "epoch": 0.159937541824671, "grad_norm": 0.1962571144104004, "learning_rate": 1.98951006115363e-05, "loss": 0.5686, "step": 717 }, { "epoch": 0.16016060673656035, "grad_norm": 0.196267768740654, "learning_rate": 1.9894760351784047e-05, "loss": 0.557, "step": 718 }, { "epoch": 0.1603836716484497, "grad_norm": 0.1771324872970581, "learning_rate": 1.9894419543997724e-05, "loss": 0.5492, "step": 719 }, { "epoch": 0.16060673656033905, "grad_norm": 0.17055559158325195, "learning_rate": 1.9894078188196213e-05, "loss": 0.5311, "step": 720 }, { "epoch": 0.16082980147222842, "grad_norm": 0.18929804861545563, "learning_rate": 1.9893736284398414e-05, "loss": 0.5388, "step": 721 }, { "epoch": 0.16105286638411778, "grad_norm": 0.1930275857448578, "learning_rate": 1.9893393832623266e-05, "loss": 0.554, "step": 722 }, { "epoch": 0.16127593129600715, "grad_norm": 0.1900511384010315, "learning_rate": 1.9893050832889734e-05, "loss": 0.5446, "step": 723 }, { "epoch": 0.1614989962078965, "grad_norm": 0.17943377792835236, "learning_rate": 1.9892707285216816e-05, "loss": 0.541, "step": 724 }, { "epoch": 0.16172206111978585, "grad_norm": 0.18837152421474457, "learning_rate": 1.9892363189623546e-05, "loss": 0.5424, "step": 725 }, { "epoch": 0.1619451260316752, "grad_norm": 0.1987512707710266, "learning_rate": 1.989201854612897e-05, "loss": 0.5412, "step": 726 }, { "epoch": 0.16216819094356458, "grad_norm": 0.1869790405035019, "learning_rate": 1.9891673354752192e-05, "loss": 0.5139, "step": 727 }, { "epoch": 0.16239125585545394, "grad_norm": 0.17572949826717377, "learning_rate": 1.9891327615512315e-05, "loss": 0.5137, "step": 728 }, { "epoch": 0.1626143207673433, "grad_norm": 0.17914965748786926, "learning_rate": 1.9890981328428502e-05, "loss": 0.5416, "step": 729 }, { "epoch": 0.16283738567923264, "grad_norm": 0.17955482006072998, "learning_rate": 1.989063449351992e-05, "loss": 0.5307, "step": 730 }, { "epoch": 0.163060450591122, "grad_norm": 0.17228074371814728, "learning_rate": 1.9890287110805787e-05, "loss": 0.5179, "step": 731 }, { "epoch": 0.16328351550301137, "grad_norm": 0.19471096992492676, "learning_rate": 1.9889939180305343e-05, "loss": 0.5787, "step": 732 }, { "epoch": 0.16350658041490074, "grad_norm": 0.19749003648757935, "learning_rate": 1.9889590702037857e-05, "loss": 0.5369, "step": 733 }, { "epoch": 0.1637296453267901, "grad_norm": 0.17162089049816132, "learning_rate": 1.9889241676022628e-05, "loss": 0.5426, "step": 734 }, { "epoch": 0.16395271023867947, "grad_norm": 0.18129977583885193, "learning_rate": 1.988889210227899e-05, "loss": 0.5297, "step": 735 }, { "epoch": 0.1641757751505688, "grad_norm": 0.18625618517398834, "learning_rate": 1.9888541980826307e-05, "loss": 0.5169, "step": 736 }, { "epoch": 0.16439884006245817, "grad_norm": 0.21191106736660004, "learning_rate": 1.9888191311683966e-05, "loss": 0.5322, "step": 737 }, { "epoch": 0.16462190497434753, "grad_norm": 0.4791422486305237, "learning_rate": 1.988784009487139e-05, "loss": 0.5285, "step": 738 }, { "epoch": 0.1648449698862369, "grad_norm": 0.18190808594226837, "learning_rate": 1.9887488330408033e-05, "loss": 0.5627, "step": 739 }, { "epoch": 0.16506803479812626, "grad_norm": 0.17037086188793182, "learning_rate": 1.9887136018313374e-05, "loss": 0.5329, "step": 740 }, { "epoch": 0.16529109971001563, "grad_norm": 0.18794186413288116, "learning_rate": 1.9886783158606934e-05, "loss": 0.5469, "step": 741 }, { "epoch": 0.16551416462190496, "grad_norm": 0.19635480642318726, "learning_rate": 1.9886429751308252e-05, "loss": 0.5707, "step": 742 }, { "epoch": 0.16573722953379433, "grad_norm": 0.1907324194908142, "learning_rate": 1.9886075796436902e-05, "loss": 0.5676, "step": 743 }, { "epoch": 0.1659602944456837, "grad_norm": 0.1943301409482956, "learning_rate": 1.9885721294012487e-05, "loss": 0.5653, "step": 744 }, { "epoch": 0.16618335935757306, "grad_norm": 0.19706295430660248, "learning_rate": 1.9885366244054646e-05, "loss": 0.5258, "step": 745 }, { "epoch": 0.16640642426946242, "grad_norm": 0.2110588550567627, "learning_rate": 1.9885010646583038e-05, "loss": 0.5509, "step": 746 }, { "epoch": 0.1666294891813518, "grad_norm": 0.18079644441604614, "learning_rate": 1.988465450161736e-05, "loss": 0.5299, "step": 747 }, { "epoch": 0.16685255409324112, "grad_norm": 0.2338915318250656, "learning_rate": 1.988429780917734e-05, "loss": 0.5637, "step": 748 }, { "epoch": 0.1670756190051305, "grad_norm": 0.18777087330818176, "learning_rate": 1.9883940569282737e-05, "loss": 0.5502, "step": 749 }, { "epoch": 0.16729868391701985, "grad_norm": 0.17644049227237701, "learning_rate": 1.988358278195333e-05, "loss": 0.5477, "step": 750 }, { "epoch": 0.16752174882890922, "grad_norm": 0.1912970095872879, "learning_rate": 1.9883224447208936e-05, "loss": 0.5447, "step": 751 }, { "epoch": 0.16774481374079858, "grad_norm": 0.1746956706047058, "learning_rate": 1.9882865565069408e-05, "loss": 0.5425, "step": 752 }, { "epoch": 0.16796787865268792, "grad_norm": 0.18779419362545013, "learning_rate": 1.9882506135554614e-05, "loss": 0.5329, "step": 753 }, { "epoch": 0.16819094356457728, "grad_norm": 0.21110400557518005, "learning_rate": 1.9882146158684473e-05, "loss": 0.5421, "step": 754 }, { "epoch": 0.16841400847646665, "grad_norm": 0.17395779490470886, "learning_rate": 1.9881785634478915e-05, "loss": 0.5321, "step": 755 }, { "epoch": 0.168637073388356, "grad_norm": 0.1749841570854187, "learning_rate": 1.988142456295791e-05, "loss": 0.522, "step": 756 }, { "epoch": 0.16886013830024538, "grad_norm": 0.17398926615715027, "learning_rate": 1.988106294414145e-05, "loss": 0.5563, "step": 757 }, { "epoch": 0.16908320321213474, "grad_norm": 0.1813582479953766, "learning_rate": 1.9880700778049575e-05, "loss": 0.5242, "step": 758 }, { "epoch": 0.16930626812402408, "grad_norm": 0.18003934621810913, "learning_rate": 1.9880338064702337e-05, "loss": 0.5468, "step": 759 }, { "epoch": 0.16952933303591344, "grad_norm": 0.17160138487815857, "learning_rate": 1.9879974804119827e-05, "loss": 0.5321, "step": 760 }, { "epoch": 0.1697523979478028, "grad_norm": 0.19145694375038147, "learning_rate": 1.9879610996322168e-05, "loss": 0.5365, "step": 761 }, { "epoch": 0.16997546285969217, "grad_norm": 0.18664413690567017, "learning_rate": 1.9879246641329505e-05, "loss": 0.5278, "step": 762 }, { "epoch": 0.17019852777158154, "grad_norm": 0.18776154518127441, "learning_rate": 1.987888173916202e-05, "loss": 0.5639, "step": 763 }, { "epoch": 0.1704215926834709, "grad_norm": 0.17712879180908203, "learning_rate": 1.9878516289839923e-05, "loss": 0.5119, "step": 764 }, { "epoch": 0.17064465759536024, "grad_norm": 0.1806483119726181, "learning_rate": 1.9878150293383457e-05, "loss": 0.5507, "step": 765 }, { "epoch": 0.1708677225072496, "grad_norm": 0.18238821625709534, "learning_rate": 1.9877783749812892e-05, "loss": 0.5581, "step": 766 }, { "epoch": 0.17109078741913897, "grad_norm": 0.17094913125038147, "learning_rate": 1.9877416659148525e-05, "loss": 0.5502, "step": 767 }, { "epoch": 0.17131385233102833, "grad_norm": 0.18648453056812286, "learning_rate": 1.9877049021410696e-05, "loss": 0.5568, "step": 768 }, { "epoch": 0.1715369172429177, "grad_norm": 0.20364215970039368, "learning_rate": 1.9876680836619762e-05, "loss": 0.5233, "step": 769 }, { "epoch": 0.17175998215480706, "grad_norm": 0.17506776750087738, "learning_rate": 1.9876312104796117e-05, "loss": 0.5479, "step": 770 }, { "epoch": 0.1719830470666964, "grad_norm": 0.17632229626178741, "learning_rate": 1.9875942825960183e-05, "loss": 0.5241, "step": 771 }, { "epoch": 0.17220611197858576, "grad_norm": 0.18557725846767426, "learning_rate": 1.9875573000132414e-05, "loss": 0.5794, "step": 772 }, { "epoch": 0.17242917689047513, "grad_norm": 0.1729423552751541, "learning_rate": 1.987520262733329e-05, "loss": 0.5247, "step": 773 }, { "epoch": 0.1726522418023645, "grad_norm": 0.18643899261951447, "learning_rate": 1.9874831707583328e-05, "loss": 0.5305, "step": 774 }, { "epoch": 0.17287530671425386, "grad_norm": 0.17992724478244781, "learning_rate": 1.987446024090307e-05, "loss": 0.5548, "step": 775 }, { "epoch": 0.17309837162614322, "grad_norm": 0.19330288469791412, "learning_rate": 1.9874088227313093e-05, "loss": 0.5151, "step": 776 }, { "epoch": 0.17332143653803256, "grad_norm": 0.18407322466373444, "learning_rate": 1.9873715666834e-05, "loss": 0.5262, "step": 777 }, { "epoch": 0.17354450144992192, "grad_norm": 0.17531730234622955, "learning_rate": 1.987334255948642e-05, "loss": 0.51, "step": 778 }, { "epoch": 0.1737675663618113, "grad_norm": 0.1791767179965973, "learning_rate": 1.987296890529103e-05, "loss": 0.5217, "step": 779 }, { "epoch": 0.17399063127370065, "grad_norm": 0.17693190276622772, "learning_rate": 1.9872594704268516e-05, "loss": 0.5346, "step": 780 }, { "epoch": 0.17421369618559002, "grad_norm": 0.17644071578979492, "learning_rate": 1.9872219956439607e-05, "loss": 0.5335, "step": 781 }, { "epoch": 0.17443676109747935, "grad_norm": 0.17572622001171112, "learning_rate": 1.987184466182506e-05, "loss": 0.5302, "step": 782 }, { "epoch": 0.17465982600936872, "grad_norm": 0.18111130595207214, "learning_rate": 1.987146882044565e-05, "loss": 0.5205, "step": 783 }, { "epoch": 0.17488289092125808, "grad_norm": 0.17094580829143524, "learning_rate": 1.987109243232221e-05, "loss": 0.527, "step": 784 }, { "epoch": 0.17510595583314745, "grad_norm": 0.1917412430047989, "learning_rate": 1.9870715497475583e-05, "loss": 0.5289, "step": 785 }, { "epoch": 0.1753290207450368, "grad_norm": 0.19271767139434814, "learning_rate": 1.9870338015926634e-05, "loss": 0.5123, "step": 786 }, { "epoch": 0.17555208565692618, "grad_norm": 0.19731736183166504, "learning_rate": 1.9869959987696282e-05, "loss": 0.543, "step": 787 }, { "epoch": 0.1757751505688155, "grad_norm": 0.172173410654068, "learning_rate": 1.9869581412805462e-05, "loss": 0.5211, "step": 788 }, { "epoch": 0.17599821548070488, "grad_norm": 0.1778416931629181, "learning_rate": 1.9869202291275144e-05, "loss": 0.5168, "step": 789 }, { "epoch": 0.17622128039259424, "grad_norm": 0.1906319111585617, "learning_rate": 1.986882262312632e-05, "loss": 0.5502, "step": 790 }, { "epoch": 0.1764443453044836, "grad_norm": 0.18600904941558838, "learning_rate": 1.986844240838002e-05, "loss": 0.5093, "step": 791 }, { "epoch": 0.17666741021637297, "grad_norm": 0.17453651130199432, "learning_rate": 1.986806164705731e-05, "loss": 0.5316, "step": 792 }, { "epoch": 0.17689047512826234, "grad_norm": 0.18647123873233795, "learning_rate": 1.9867680339179268e-05, "loss": 0.5293, "step": 793 }, { "epoch": 0.17711354004015167, "grad_norm": 0.18260005116462708, "learning_rate": 1.9867298484767022e-05, "loss": 0.5429, "step": 794 }, { "epoch": 0.17733660495204104, "grad_norm": 0.1722402721643448, "learning_rate": 1.9866916083841715e-05, "loss": 0.5211, "step": 795 }, { "epoch": 0.1775596698639304, "grad_norm": 0.16579583287239075, "learning_rate": 1.9866533136424537e-05, "loss": 0.5173, "step": 796 }, { "epoch": 0.17778273477581977, "grad_norm": 0.18849937617778778, "learning_rate": 1.9866149642536683e-05, "loss": 0.5482, "step": 797 }, { "epoch": 0.17800579968770913, "grad_norm": 0.1786874532699585, "learning_rate": 1.98657656021994e-05, "loss": 0.5203, "step": 798 }, { "epoch": 0.1782288645995985, "grad_norm": 0.1848941594362259, "learning_rate": 1.986538101543397e-05, "loss": 0.5248, "step": 799 }, { "epoch": 0.17845192951148783, "grad_norm": 0.1833941787481308, "learning_rate": 1.9864995882261674e-05, "loss": 0.5192, "step": 800 }, { "epoch": 0.1786749944233772, "grad_norm": 0.1813460886478424, "learning_rate": 1.9864610202703858e-05, "loss": 0.5307, "step": 801 }, { "epoch": 0.17889805933526656, "grad_norm": 0.18154440820217133, "learning_rate": 1.9864223976781876e-05, "loss": 0.5468, "step": 802 }, { "epoch": 0.17912112424715593, "grad_norm": 0.18413223326206207, "learning_rate": 1.9863837204517124e-05, "loss": 0.5376, "step": 803 }, { "epoch": 0.1793441891590453, "grad_norm": 0.17483587563037872, "learning_rate": 1.986344988593102e-05, "loss": 0.5333, "step": 804 }, { "epoch": 0.17956725407093463, "grad_norm": 0.17205438017845154, "learning_rate": 1.9863062021045017e-05, "loss": 0.4933, "step": 805 }, { "epoch": 0.179790318982824, "grad_norm": 0.17578768730163574, "learning_rate": 1.98626736098806e-05, "loss": 0.5153, "step": 806 }, { "epoch": 0.18001338389471336, "grad_norm": 0.1773952841758728, "learning_rate": 1.9862284652459275e-05, "loss": 0.5265, "step": 807 }, { "epoch": 0.18023644880660272, "grad_norm": 0.17616188526153564, "learning_rate": 1.9861895148802594e-05, "loss": 0.5438, "step": 808 }, { "epoch": 0.1804595137184921, "grad_norm": 0.1791033297777176, "learning_rate": 1.9861505098932127e-05, "loss": 0.5294, "step": 809 }, { "epoch": 0.18068257863038145, "grad_norm": 0.18261539936065674, "learning_rate": 1.986111450286947e-05, "loss": 0.548, "step": 810 }, { "epoch": 0.1809056435422708, "grad_norm": 0.1767009198665619, "learning_rate": 1.986072336063627e-05, "loss": 0.5278, "step": 811 }, { "epoch": 0.18112870845416015, "grad_norm": 0.19362005591392517, "learning_rate": 1.9860331672254182e-05, "loss": 0.5206, "step": 812 }, { "epoch": 0.18135177336604952, "grad_norm": 0.185451477766037, "learning_rate": 1.98599394377449e-05, "loss": 0.5513, "step": 813 }, { "epoch": 0.18157483827793888, "grad_norm": 0.17266923189163208, "learning_rate": 1.985954665713015e-05, "loss": 0.5648, "step": 814 }, { "epoch": 0.18179790318982825, "grad_norm": 0.170868918299675, "learning_rate": 1.9859153330431692e-05, "loss": 0.5343, "step": 815 }, { "epoch": 0.1820209681017176, "grad_norm": 0.17769230902194977, "learning_rate": 1.98587594576713e-05, "loss": 0.5539, "step": 816 }, { "epoch": 0.18224403301360695, "grad_norm": 0.16811443865299225, "learning_rate": 1.9858365038870803e-05, "loss": 0.5209, "step": 817 }, { "epoch": 0.1824670979254963, "grad_norm": 0.1751745492219925, "learning_rate": 1.985797007405203e-05, "loss": 0.5509, "step": 818 }, { "epoch": 0.18269016283738568, "grad_norm": 0.1694861203432083, "learning_rate": 1.985757456323687e-05, "loss": 0.5505, "step": 819 }, { "epoch": 0.18291322774927504, "grad_norm": 0.1734897792339325, "learning_rate": 1.985717850644722e-05, "loss": 0.5022, "step": 820 }, { "epoch": 0.1831362926611644, "grad_norm": 0.18104127049446106, "learning_rate": 1.9856781903705026e-05, "loss": 0.5434, "step": 821 }, { "epoch": 0.18335935757305377, "grad_norm": 0.17157487571239471, "learning_rate": 1.9856384755032245e-05, "loss": 0.5356, "step": 822 }, { "epoch": 0.1835824224849431, "grad_norm": 0.1781257838010788, "learning_rate": 1.985598706045088e-05, "loss": 0.522, "step": 823 }, { "epoch": 0.18380548739683247, "grad_norm": 0.16910432279109955, "learning_rate": 1.985558881998295e-05, "loss": 0.5324, "step": 824 }, { "epoch": 0.18402855230872184, "grad_norm": 0.180936798453331, "learning_rate": 1.985519003365052e-05, "loss": 0.5307, "step": 825 }, { "epoch": 0.1842516172206112, "grad_norm": 0.1872517466545105, "learning_rate": 1.9854790701475676e-05, "loss": 0.5667, "step": 826 }, { "epoch": 0.18447468213250057, "grad_norm": 0.16592343151569366, "learning_rate": 1.985439082348053e-05, "loss": 0.4938, "step": 827 }, { "epoch": 0.1846977470443899, "grad_norm": 0.18184059858322144, "learning_rate": 1.9853990399687237e-05, "loss": 0.536, "step": 828 }, { "epoch": 0.18492081195627927, "grad_norm": 0.1687706708908081, "learning_rate": 1.985358943011797e-05, "loss": 0.4902, "step": 829 }, { "epoch": 0.18514387686816863, "grad_norm": 0.18062466382980347, "learning_rate": 1.985318791479494e-05, "loss": 0.5235, "step": 830 }, { "epoch": 0.185366941780058, "grad_norm": 0.1865355372428894, "learning_rate": 1.985278585374038e-05, "loss": 0.5439, "step": 831 }, { "epoch": 0.18559000669194736, "grad_norm": 0.21087662875652313, "learning_rate": 1.985238324697657e-05, "loss": 0.5338, "step": 832 }, { "epoch": 0.18581307160383673, "grad_norm": 0.18593360483646393, "learning_rate": 1.9851980094525795e-05, "loss": 0.5139, "step": 833 }, { "epoch": 0.18603613651572606, "grad_norm": 0.17105735838413239, "learning_rate": 1.9851576396410395e-05, "loss": 0.5385, "step": 834 }, { "epoch": 0.18625920142761543, "grad_norm": 0.1764969825744629, "learning_rate": 1.9851172152652722e-05, "loss": 0.5381, "step": 835 }, { "epoch": 0.1864822663395048, "grad_norm": 0.211869478225708, "learning_rate": 1.985076736327517e-05, "loss": 0.512, "step": 836 }, { "epoch": 0.18670533125139416, "grad_norm": 0.17822885513305664, "learning_rate": 1.9850362028300162e-05, "loss": 0.5394, "step": 837 }, { "epoch": 0.18692839616328352, "grad_norm": 0.17853499948978424, "learning_rate": 1.9849956147750137e-05, "loss": 0.5273, "step": 838 }, { "epoch": 0.1871514610751729, "grad_norm": 0.1717289835214615, "learning_rate": 1.9849549721647586e-05, "loss": 0.5126, "step": 839 }, { "epoch": 0.18737452598706222, "grad_norm": 0.167042076587677, "learning_rate": 1.9849142750015014e-05, "loss": 0.5154, "step": 840 }, { "epoch": 0.1875975908989516, "grad_norm": 0.1893247812986374, "learning_rate": 1.9848735232874966e-05, "loss": 0.5205, "step": 841 }, { "epoch": 0.18782065581084095, "grad_norm": 0.18898561596870422, "learning_rate": 1.984832717025001e-05, "loss": 0.5513, "step": 842 }, { "epoch": 0.18804372072273032, "grad_norm": 0.18172813951969147, "learning_rate": 1.984791856216274e-05, "loss": 0.5056, "step": 843 }, { "epoch": 0.18826678563461968, "grad_norm": 0.1696682870388031, "learning_rate": 1.98475094086358e-05, "loss": 0.5313, "step": 844 }, { "epoch": 0.18848985054650905, "grad_norm": 0.1731012910604477, "learning_rate": 1.9847099709691843e-05, "loss": 0.5108, "step": 845 }, { "epoch": 0.18871291545839838, "grad_norm": 0.18047916889190674, "learning_rate": 1.9846689465353563e-05, "loss": 0.5108, "step": 846 }, { "epoch": 0.18893598037028775, "grad_norm": 0.17469045519828796, "learning_rate": 1.9846278675643684e-05, "loss": 0.5273, "step": 847 }, { "epoch": 0.1891590452821771, "grad_norm": 0.18713539838790894, "learning_rate": 1.9845867340584957e-05, "loss": 0.5587, "step": 848 }, { "epoch": 0.18938211019406648, "grad_norm": 0.2040695995092392, "learning_rate": 1.984545546020016e-05, "loss": 0.5651, "step": 849 }, { "epoch": 0.18960517510595584, "grad_norm": 0.24084708094596863, "learning_rate": 1.984504303451211e-05, "loss": 0.5186, "step": 850 }, { "epoch": 0.1898282400178452, "grad_norm": 0.18628886342048645, "learning_rate": 1.9844630063543655e-05, "loss": 0.5091, "step": 851 }, { "epoch": 0.19005130492973454, "grad_norm": 0.18985594809055328, "learning_rate": 1.9844216547317656e-05, "loss": 0.5457, "step": 852 }, { "epoch": 0.1902743698416239, "grad_norm": 0.1807604879140854, "learning_rate": 1.9843802485857028e-05, "loss": 0.5137, "step": 853 }, { "epoch": 0.19049743475351327, "grad_norm": 0.1864684522151947, "learning_rate": 1.984338787918469e-05, "loss": 0.5232, "step": 854 }, { "epoch": 0.19072049966540264, "grad_norm": 0.17398470640182495, "learning_rate": 1.984297272732362e-05, "loss": 0.5207, "step": 855 }, { "epoch": 0.190943564577292, "grad_norm": 0.21071763336658478, "learning_rate": 1.9842557030296804e-05, "loss": 0.5032, "step": 856 }, { "epoch": 0.19116662948918134, "grad_norm": 0.17372627556324005, "learning_rate": 1.9842140788127264e-05, "loss": 0.4992, "step": 857 }, { "epoch": 0.1913896944010707, "grad_norm": 0.18683859705924988, "learning_rate": 1.9841724000838064e-05, "loss": 0.5342, "step": 858 }, { "epoch": 0.19161275931296007, "grad_norm": 0.18670214712619781, "learning_rate": 1.9841306668452275e-05, "loss": 0.521, "step": 859 }, { "epoch": 0.19183582422484943, "grad_norm": 0.18776099383831024, "learning_rate": 1.9840888790993023e-05, "loss": 0.5164, "step": 860 }, { "epoch": 0.1920588891367388, "grad_norm": 0.20680485665798187, "learning_rate": 1.9840470368483448e-05, "loss": 0.5433, "step": 861 }, { "epoch": 0.19228195404862816, "grad_norm": 0.18562763929367065, "learning_rate": 1.9840051400946724e-05, "loss": 0.5225, "step": 862 }, { "epoch": 0.1925050189605175, "grad_norm": 0.1958203911781311, "learning_rate": 1.9839631888406055e-05, "loss": 0.5333, "step": 863 }, { "epoch": 0.19272808387240686, "grad_norm": 0.1797972470521927, "learning_rate": 1.9839211830884682e-05, "loss": 0.5259, "step": 864 }, { "epoch": 0.19295114878429623, "grad_norm": 0.18006913363933563, "learning_rate": 1.9838791228405866e-05, "loss": 0.5355, "step": 865 }, { "epoch": 0.1931742136961856, "grad_norm": 0.19286584854125977, "learning_rate": 1.9838370080992902e-05, "loss": 0.5548, "step": 866 }, { "epoch": 0.19339727860807496, "grad_norm": 0.17611972987651825, "learning_rate": 1.9837948388669118e-05, "loss": 0.4975, "step": 867 }, { "epoch": 0.19362034351996432, "grad_norm": 0.17204061150550842, "learning_rate": 1.983752615145787e-05, "loss": 0.5353, "step": 868 }, { "epoch": 0.19384340843185366, "grad_norm": 0.17773252725601196, "learning_rate": 1.9837103369382542e-05, "loss": 0.5621, "step": 869 }, { "epoch": 0.19406647334374302, "grad_norm": 0.17241743206977844, "learning_rate": 1.983668004246655e-05, "loss": 0.5514, "step": 870 }, { "epoch": 0.1942895382556324, "grad_norm": 0.1765352487564087, "learning_rate": 1.9836256170733343e-05, "loss": 0.5262, "step": 871 }, { "epoch": 0.19451260316752175, "grad_norm": 0.18487049639225006, "learning_rate": 1.98358317542064e-05, "loss": 0.4941, "step": 872 }, { "epoch": 0.19473566807941112, "grad_norm": 0.17895649373531342, "learning_rate": 1.983540679290922e-05, "loss": 0.5501, "step": 873 }, { "epoch": 0.19495873299130048, "grad_norm": 0.1671830266714096, "learning_rate": 1.9834981286865343e-05, "loss": 0.5207, "step": 874 }, { "epoch": 0.19518179790318982, "grad_norm": 0.18288585543632507, "learning_rate": 1.9834555236098344e-05, "loss": 0.5405, "step": 875 }, { "epoch": 0.19540486281507918, "grad_norm": 0.2788625657558441, "learning_rate": 1.983412864063181e-05, "loss": 0.5029, "step": 876 }, { "epoch": 0.19562792772696855, "grad_norm": 0.17516101896762848, "learning_rate": 1.983370150048938e-05, "loss": 0.5181, "step": 877 }, { "epoch": 0.1958509926388579, "grad_norm": 0.18724878132343292, "learning_rate": 1.9833273815694695e-05, "loss": 0.5399, "step": 878 }, { "epoch": 0.19607405755074728, "grad_norm": 0.1845855563879013, "learning_rate": 1.9832845586271456e-05, "loss": 0.5493, "step": 879 }, { "epoch": 0.19629712246263661, "grad_norm": 0.1939295083284378, "learning_rate": 1.9832416812243377e-05, "loss": 0.5453, "step": 880 }, { "epoch": 0.19652018737452598, "grad_norm": 0.17018291354179382, "learning_rate": 1.9831987493634207e-05, "loss": 0.5096, "step": 881 }, { "epoch": 0.19674325228641534, "grad_norm": 0.18090112507343292, "learning_rate": 1.9831557630467725e-05, "loss": 0.5519, "step": 882 }, { "epoch": 0.1969663171983047, "grad_norm": 0.19264590740203857, "learning_rate": 1.983112722276774e-05, "loss": 0.5469, "step": 883 }, { "epoch": 0.19718938211019407, "grad_norm": 0.185777947306633, "learning_rate": 1.9830696270558084e-05, "loss": 0.5484, "step": 884 }, { "epoch": 0.19741244702208344, "grad_norm": 0.15953023731708527, "learning_rate": 1.9830264773862633e-05, "loss": 0.5139, "step": 885 }, { "epoch": 0.19763551193397277, "grad_norm": 0.17809630930423737, "learning_rate": 1.9829832732705284e-05, "loss": 0.5178, "step": 886 }, { "epoch": 0.19785857684586214, "grad_norm": 0.20061112940311432, "learning_rate": 1.982940014710997e-05, "loss": 0.5393, "step": 887 }, { "epoch": 0.1980816417577515, "grad_norm": 0.1748671978712082, "learning_rate": 1.9828967017100642e-05, "loss": 0.5332, "step": 888 }, { "epoch": 0.19830470666964087, "grad_norm": 0.1713806539773941, "learning_rate": 1.9828533342701296e-05, "loss": 0.5165, "step": 889 }, { "epoch": 0.19852777158153023, "grad_norm": 0.16470858454704285, "learning_rate": 1.9828099123935948e-05, "loss": 0.5133, "step": 890 }, { "epoch": 0.1987508364934196, "grad_norm": 0.17374449968338013, "learning_rate": 1.9827664360828647e-05, "loss": 0.5475, "step": 891 }, { "epoch": 0.19897390140530893, "grad_norm": 0.1877843141555786, "learning_rate": 1.982722905340348e-05, "loss": 0.5253, "step": 892 }, { "epoch": 0.1991969663171983, "grad_norm": 0.17328353226184845, "learning_rate": 1.982679320168455e-05, "loss": 0.5219, "step": 893 }, { "epoch": 0.19942003122908766, "grad_norm": 0.17716079950332642, "learning_rate": 1.9826356805696e-05, "loss": 0.535, "step": 894 }, { "epoch": 0.19964309614097703, "grad_norm": 0.17511911690235138, "learning_rate": 1.9825919865462004e-05, "loss": 0.548, "step": 895 }, { "epoch": 0.1998661610528664, "grad_norm": 0.1711902767419815, "learning_rate": 1.9825482381006752e-05, "loss": 0.5402, "step": 896 }, { "epoch": 0.20008922596475576, "grad_norm": 0.17732638120651245, "learning_rate": 1.9825044352354482e-05, "loss": 0.5672, "step": 897 }, { "epoch": 0.2003122908766451, "grad_norm": 0.17266635596752167, "learning_rate": 1.9824605779529456e-05, "loss": 0.5312, "step": 898 }, { "epoch": 0.20053535578853446, "grad_norm": 0.16797249019145966, "learning_rate": 1.982416666255596e-05, "loss": 0.5294, "step": 899 }, { "epoch": 0.20075842070042382, "grad_norm": 0.17062917351722717, "learning_rate": 1.9823727001458318e-05, "loss": 0.51, "step": 900 }, { "epoch": 0.2009814856123132, "grad_norm": 0.17725898325443268, "learning_rate": 1.9823286796260887e-05, "loss": 0.5284, "step": 901 }, { "epoch": 0.20120455052420255, "grad_norm": 0.17662313580513, "learning_rate": 1.9822846046988037e-05, "loss": 0.515, "step": 902 }, { "epoch": 0.2014276154360919, "grad_norm": 0.17648808658123016, "learning_rate": 1.9822404753664183e-05, "loss": 0.5437, "step": 903 }, { "epoch": 0.20165068034798125, "grad_norm": 0.17179900407791138, "learning_rate": 1.982196291631377e-05, "loss": 0.5332, "step": 904 }, { "epoch": 0.20187374525987062, "grad_norm": 0.19034990668296814, "learning_rate": 1.982152053496127e-05, "loss": 0.5279, "step": 905 }, { "epoch": 0.20209681017175998, "grad_norm": 0.17066267132759094, "learning_rate": 1.9821077609631184e-05, "loss": 0.5473, "step": 906 }, { "epoch": 0.20231987508364935, "grad_norm": 0.1806066334247589, "learning_rate": 1.982063414034804e-05, "loss": 0.5495, "step": 907 }, { "epoch": 0.2025429399955387, "grad_norm": 0.17627598345279694, "learning_rate": 1.9820190127136403e-05, "loss": 0.5469, "step": 908 }, { "epoch": 0.20276600490742805, "grad_norm": 0.18282422423362732, "learning_rate": 1.9819745570020867e-05, "loss": 0.5228, "step": 909 }, { "epoch": 0.2029890698193174, "grad_norm": 0.17409084737300873, "learning_rate": 1.981930046902605e-05, "loss": 0.5183, "step": 910 }, { "epoch": 0.20321213473120678, "grad_norm": 0.1846904307603836, "learning_rate": 1.9818854824176612e-05, "loss": 0.5198, "step": 911 }, { "epoch": 0.20343519964309614, "grad_norm": 0.18149109184741974, "learning_rate": 1.9818408635497224e-05, "loss": 0.5078, "step": 912 }, { "epoch": 0.2036582645549855, "grad_norm": 0.18114425241947174, "learning_rate": 1.981796190301261e-05, "loss": 0.531, "step": 913 }, { "epoch": 0.20388132946687487, "grad_norm": 0.1718929558992386, "learning_rate": 1.981751462674751e-05, "loss": 0.4939, "step": 914 }, { "epoch": 0.2041043943787642, "grad_norm": 0.1930830329656601, "learning_rate": 1.9817066806726695e-05, "loss": 0.5055, "step": 915 }, { "epoch": 0.20432745929065357, "grad_norm": 0.17051468789577484, "learning_rate": 1.9816618442974964e-05, "loss": 0.5058, "step": 916 }, { "epoch": 0.20455052420254294, "grad_norm": 0.1774812787771225, "learning_rate": 1.9816169535517157e-05, "loss": 0.5341, "step": 917 }, { "epoch": 0.2047735891144323, "grad_norm": 0.17879648506641388, "learning_rate": 1.9815720084378134e-05, "loss": 0.534, "step": 918 }, { "epoch": 0.20499665402632167, "grad_norm": 0.1766035556793213, "learning_rate": 1.9815270089582795e-05, "loss": 0.5407, "step": 919 }, { "epoch": 0.20521971893821103, "grad_norm": 0.17892096936702728, "learning_rate": 1.981481955115605e-05, "loss": 0.4676, "step": 920 }, { "epoch": 0.20544278385010037, "grad_norm": 0.17289894819259644, "learning_rate": 1.9814368469122866e-05, "loss": 0.5416, "step": 921 }, { "epoch": 0.20566584876198973, "grad_norm": 0.1802949607372284, "learning_rate": 1.981391684350822e-05, "loss": 0.5425, "step": 922 }, { "epoch": 0.2058889136738791, "grad_norm": 0.17635071277618408, "learning_rate": 1.9813464674337126e-05, "loss": 0.5294, "step": 923 }, { "epoch": 0.20611197858576846, "grad_norm": 0.1723651885986328, "learning_rate": 1.981301196163463e-05, "loss": 0.5045, "step": 924 }, { "epoch": 0.20633504349765783, "grad_norm": 0.18130990862846375, "learning_rate": 1.9812558705425805e-05, "loss": 0.5264, "step": 925 }, { "epoch": 0.2065581084095472, "grad_norm": 0.1879456490278244, "learning_rate": 1.9812104905735756e-05, "loss": 0.5215, "step": 926 }, { "epoch": 0.20678117332143653, "grad_norm": 0.1742536425590515, "learning_rate": 1.9811650562589616e-05, "loss": 0.5093, "step": 927 }, { "epoch": 0.2070042382333259, "grad_norm": 0.18259446322917938, "learning_rate": 1.981119567601255e-05, "loss": 0.5528, "step": 928 }, { "epoch": 0.20722730314521526, "grad_norm": 0.19506299495697021, "learning_rate": 1.9810740246029755e-05, "loss": 0.5338, "step": 929 }, { "epoch": 0.20745036805710462, "grad_norm": 0.166092187166214, "learning_rate": 1.981028427266645e-05, "loss": 0.5308, "step": 930 }, { "epoch": 0.207673432968994, "grad_norm": 0.18397627770900726, "learning_rate": 1.980982775594789e-05, "loss": 0.5593, "step": 931 }, { "epoch": 0.20789649788088332, "grad_norm": 0.18404512107372284, "learning_rate": 1.980937069589937e-05, "loss": 0.5004, "step": 932 }, { "epoch": 0.2081195627927727, "grad_norm": 0.1727455109357834, "learning_rate": 1.9808913092546195e-05, "loss": 0.5245, "step": 933 }, { "epoch": 0.20834262770466205, "grad_norm": 0.18410643935203552, "learning_rate": 1.980845494591371e-05, "loss": 0.532, "step": 934 }, { "epoch": 0.20856569261655142, "grad_norm": 0.19003835320472717, "learning_rate": 1.9807996256027296e-05, "loss": 0.5129, "step": 935 }, { "epoch": 0.20878875752844078, "grad_norm": 0.23560728132724762, "learning_rate": 1.980753702291235e-05, "loss": 0.5484, "step": 936 }, { "epoch": 0.20901182244033015, "grad_norm": 0.17955288290977478, "learning_rate": 1.9807077246594316e-05, "loss": 0.5198, "step": 937 }, { "epoch": 0.20923488735221948, "grad_norm": 0.19580209255218506, "learning_rate": 1.9806616927098653e-05, "loss": 0.5023, "step": 938 }, { "epoch": 0.20945795226410885, "grad_norm": 0.17852741479873657, "learning_rate": 1.9806156064450855e-05, "loss": 0.5101, "step": 939 }, { "epoch": 0.2096810171759982, "grad_norm": 0.17126716673374176, "learning_rate": 1.9805694658676458e-05, "loss": 0.5415, "step": 940 }, { "epoch": 0.20990408208788758, "grad_norm": 0.16866669058799744, "learning_rate": 1.9805232709801008e-05, "loss": 0.5066, "step": 941 }, { "epoch": 0.21012714699977694, "grad_norm": 0.17179332673549652, "learning_rate": 1.9804770217850093e-05, "loss": 0.5059, "step": 942 }, { "epoch": 0.2103502119116663, "grad_norm": 0.1836715042591095, "learning_rate": 1.9804307182849326e-05, "loss": 0.5291, "step": 943 }, { "epoch": 0.21057327682355564, "grad_norm": 0.2429589480161667, "learning_rate": 1.980384360482436e-05, "loss": 0.5282, "step": 944 }, { "epoch": 0.210796341735445, "grad_norm": 0.17777171730995178, "learning_rate": 1.9803379483800866e-05, "loss": 0.5338, "step": 945 }, { "epoch": 0.21101940664733437, "grad_norm": 0.20419363677501678, "learning_rate": 1.9802914819804546e-05, "loss": 0.5345, "step": 946 }, { "epoch": 0.21124247155922374, "grad_norm": 0.23102112114429474, "learning_rate": 1.9802449612861144e-05, "loss": 0.5272, "step": 947 }, { "epoch": 0.2114655364711131, "grad_norm": 0.17385543882846832, "learning_rate": 1.9801983862996423e-05, "loss": 0.5206, "step": 948 }, { "epoch": 0.21168860138300247, "grad_norm": 0.20227134227752686, "learning_rate": 1.980151757023618e-05, "loss": 0.5362, "step": 949 }, { "epoch": 0.2119116662948918, "grad_norm": 0.3727608025074005, "learning_rate": 1.9801050734606236e-05, "loss": 0.511, "step": 950 }, { "epoch": 0.21213473120678117, "grad_norm": 0.17483538389205933, "learning_rate": 1.9800583356132453e-05, "loss": 0.5251, "step": 951 }, { "epoch": 0.21235779611867053, "grad_norm": 0.17400579154491425, "learning_rate": 1.9800115434840716e-05, "loss": 0.541, "step": 952 }, { "epoch": 0.2125808610305599, "grad_norm": 0.16707202792167664, "learning_rate": 1.979964697075694e-05, "loss": 0.5391, "step": 953 }, { "epoch": 0.21280392594244926, "grad_norm": 0.16636815667152405, "learning_rate": 1.9799177963907074e-05, "loss": 0.5434, "step": 954 }, { "epoch": 0.2130269908543386, "grad_norm": 0.17174044251441956, "learning_rate": 1.9798708414317095e-05, "loss": 0.5389, "step": 955 }, { "epoch": 0.21325005576622796, "grad_norm": 0.16985206305980682, "learning_rate": 1.9798238322013002e-05, "loss": 0.5327, "step": 956 }, { "epoch": 0.21347312067811733, "grad_norm": 0.17486560344696045, "learning_rate": 1.9797767687020843e-05, "loss": 0.5428, "step": 957 }, { "epoch": 0.2136961855900067, "grad_norm": 0.17041227221488953, "learning_rate": 1.9797296509366678e-05, "loss": 0.4995, "step": 958 }, { "epoch": 0.21391925050189606, "grad_norm": 0.1798069328069687, "learning_rate": 1.97968247890766e-05, "loss": 0.5364, "step": 959 }, { "epoch": 0.21414231541378542, "grad_norm": 0.17648300528526306, "learning_rate": 1.9796352526176746e-05, "loss": 0.5317, "step": 960 }, { "epoch": 0.21436538032567476, "grad_norm": 0.16647587716579437, "learning_rate": 1.9795879720693264e-05, "loss": 0.4989, "step": 961 }, { "epoch": 0.21458844523756412, "grad_norm": 0.17619404196739197, "learning_rate": 1.9795406372652345e-05, "loss": 0.5123, "step": 962 }, { "epoch": 0.2148115101494535, "grad_norm": 0.1789608895778656, "learning_rate": 1.979493248208021e-05, "loss": 0.5421, "step": 963 }, { "epoch": 0.21503457506134285, "grad_norm": 0.16893987357616425, "learning_rate": 1.97944580490031e-05, "loss": 0.5226, "step": 964 }, { "epoch": 0.21525763997323222, "grad_norm": 0.1653861552476883, "learning_rate": 1.9793983073447288e-05, "loss": 0.5221, "step": 965 }, { "epoch": 0.21548070488512158, "grad_norm": 0.17093954980373383, "learning_rate": 1.9793507555439092e-05, "loss": 0.535, "step": 966 }, { "epoch": 0.21570376979701092, "grad_norm": 0.18254542350769043, "learning_rate": 1.9793031495004845e-05, "loss": 0.5585, "step": 967 }, { "epoch": 0.21592683470890028, "grad_norm": 0.19674773514270782, "learning_rate": 1.9792554892170908e-05, "loss": 0.5159, "step": 968 }, { "epoch": 0.21614989962078965, "grad_norm": 0.17958855628967285, "learning_rate": 1.9792077746963686e-05, "loss": 0.5185, "step": 969 }, { "epoch": 0.216372964532679, "grad_norm": 0.1741204559803009, "learning_rate": 1.9791600059409606e-05, "loss": 0.5325, "step": 970 }, { "epoch": 0.21659602944456838, "grad_norm": 0.1755409985780716, "learning_rate": 1.9791121829535122e-05, "loss": 0.5005, "step": 971 }, { "epoch": 0.21681909435645774, "grad_norm": 0.17128707468509674, "learning_rate": 1.979064305736672e-05, "loss": 0.5345, "step": 972 }, { "epoch": 0.21704215926834708, "grad_norm": 0.17565475404262543, "learning_rate": 1.9790163742930922e-05, "loss": 0.4964, "step": 973 }, { "epoch": 0.21726522418023644, "grad_norm": 0.1766713559627533, "learning_rate": 1.978968388625427e-05, "loss": 0.5151, "step": 974 }, { "epoch": 0.2174882890921258, "grad_norm": 0.1699419617652893, "learning_rate": 1.9789203487363352e-05, "loss": 0.5365, "step": 975 }, { "epoch": 0.21771135400401517, "grad_norm": 0.20117329061031342, "learning_rate": 1.978872254628476e-05, "loss": 0.4942, "step": 976 }, { "epoch": 0.21793441891590454, "grad_norm": 0.17031805217266083, "learning_rate": 1.9788241063045147e-05, "loss": 0.5262, "step": 977 }, { "epoch": 0.21815748382779387, "grad_norm": 0.17150211334228516, "learning_rate": 1.9787759037671172e-05, "loss": 0.5169, "step": 978 }, { "epoch": 0.21838054873968324, "grad_norm": 0.17386960983276367, "learning_rate": 1.978727647018953e-05, "loss": 0.5464, "step": 979 }, { "epoch": 0.2186036136515726, "grad_norm": 0.17337769269943237, "learning_rate": 1.9786793360626956e-05, "loss": 0.5217, "step": 980 }, { "epoch": 0.21882667856346197, "grad_norm": 0.17415766417980194, "learning_rate": 1.9786309709010204e-05, "loss": 0.5222, "step": 981 }, { "epoch": 0.21904974347535133, "grad_norm": 0.17464500665664673, "learning_rate": 1.978582551536606e-05, "loss": 0.504, "step": 982 }, { "epoch": 0.2192728083872407, "grad_norm": 0.1703118085861206, "learning_rate": 1.9785340779721348e-05, "loss": 0.5419, "step": 983 }, { "epoch": 0.21949587329913003, "grad_norm": 0.1777229905128479, "learning_rate": 1.9784855502102908e-05, "loss": 0.5396, "step": 984 }, { "epoch": 0.2197189382110194, "grad_norm": 0.1821225881576538, "learning_rate": 1.978436968253762e-05, "loss": 0.5214, "step": 985 }, { "epoch": 0.21994200312290876, "grad_norm": 0.17865097522735596, "learning_rate": 1.9783883321052394e-05, "loss": 0.5354, "step": 986 }, { "epoch": 0.22016506803479813, "grad_norm": 0.17980031669139862, "learning_rate": 1.978339641767417e-05, "loss": 0.5461, "step": 987 }, { "epoch": 0.2203881329466875, "grad_norm": 0.18906496465206146, "learning_rate": 1.9782908972429906e-05, "loss": 0.5466, "step": 988 }, { "epoch": 0.22061119785857686, "grad_norm": 0.20680966973304749, "learning_rate": 1.978242098534661e-05, "loss": 0.4944, "step": 989 }, { "epoch": 0.2208342627704662, "grad_norm": 0.18064817786216736, "learning_rate": 1.978193245645131e-05, "loss": 0.5418, "step": 990 }, { "epoch": 0.22105732768235556, "grad_norm": 0.18148301541805267, "learning_rate": 1.978144338577105e-05, "loss": 0.5348, "step": 991 }, { "epoch": 0.22128039259424492, "grad_norm": 0.18182723224163055, "learning_rate": 1.9780953773332933e-05, "loss": 0.5159, "step": 992 }, { "epoch": 0.2215034575061343, "grad_norm": 0.17886720597743988, "learning_rate": 1.9780463619164073e-05, "loss": 0.5211, "step": 993 }, { "epoch": 0.22172652241802365, "grad_norm": 0.1731138974428177, "learning_rate": 1.9779972923291615e-05, "loss": 0.5386, "step": 994 }, { "epoch": 0.22194958732991302, "grad_norm": 0.17276941239833832, "learning_rate": 1.977948168574274e-05, "loss": 0.553, "step": 995 }, { "epoch": 0.22217265224180235, "grad_norm": 0.1833122968673706, "learning_rate": 1.977898990654465e-05, "loss": 0.5082, "step": 996 }, { "epoch": 0.22239571715369172, "grad_norm": 0.17474150657653809, "learning_rate": 1.9778497585724586e-05, "loss": 0.5167, "step": 997 }, { "epoch": 0.22261878206558108, "grad_norm": 0.18032675981521606, "learning_rate": 1.977800472330982e-05, "loss": 0.5351, "step": 998 }, { "epoch": 0.22284184697747045, "grad_norm": 0.19136174023151398, "learning_rate": 1.9777511319327645e-05, "loss": 0.5387, "step": 999 }, { "epoch": 0.2230649118893598, "grad_norm": 0.17949000000953674, "learning_rate": 1.977701737380539e-05, "loss": 0.5132, "step": 1000 }, { "epoch": 0.22328797680124918, "grad_norm": 0.16305360198020935, "learning_rate": 1.9776522886770413e-05, "loss": 0.4798, "step": 1001 }, { "epoch": 0.22351104171313851, "grad_norm": 0.18127582967281342, "learning_rate": 1.9776027858250102e-05, "loss": 0.5314, "step": 1002 }, { "epoch": 0.22373410662502788, "grad_norm": 0.17198516428470612, "learning_rate": 1.9775532288271876e-05, "loss": 0.5356, "step": 1003 }, { "epoch": 0.22395717153691724, "grad_norm": 0.17598840594291687, "learning_rate": 1.9775036176863178e-05, "loss": 0.5243, "step": 1004 }, { "epoch": 0.2241802364488066, "grad_norm": 0.17470763623714447, "learning_rate": 1.977453952405149e-05, "loss": 0.5138, "step": 1005 }, { "epoch": 0.22440330136069597, "grad_norm": 0.17710572481155396, "learning_rate": 1.977404232986432e-05, "loss": 0.5103, "step": 1006 }, { "epoch": 0.2246263662725853, "grad_norm": 0.16758602857589722, "learning_rate": 1.9773544594329202e-05, "loss": 0.5685, "step": 1007 }, { "epoch": 0.22484943118447467, "grad_norm": 0.17261147499084473, "learning_rate": 1.977304631747371e-05, "loss": 0.5149, "step": 1008 }, { "epoch": 0.22507249609636404, "grad_norm": 0.1722407191991806, "learning_rate": 1.9772547499325437e-05, "loss": 0.5277, "step": 1009 }, { "epoch": 0.2252955610082534, "grad_norm": 0.17808939516544342, "learning_rate": 1.9772048139912012e-05, "loss": 0.5392, "step": 1010 }, { "epoch": 0.22551862592014277, "grad_norm": 0.17333589494228363, "learning_rate": 1.9771548239261088e-05, "loss": 0.5347, "step": 1011 }, { "epoch": 0.22574169083203213, "grad_norm": 0.1775379180908203, "learning_rate": 1.9771047797400363e-05, "loss": 0.5243, "step": 1012 }, { "epoch": 0.22596475574392147, "grad_norm": 0.19107598066329956, "learning_rate": 1.9770546814357546e-05, "loss": 0.5464, "step": 1013 }, { "epoch": 0.22618782065581083, "grad_norm": 0.1707477569580078, "learning_rate": 1.9770045290160388e-05, "loss": 0.5337, "step": 1014 }, { "epoch": 0.2264108855677002, "grad_norm": 0.19580954313278198, "learning_rate": 1.9769543224836668e-05, "loss": 0.5138, "step": 1015 }, { "epoch": 0.22663395047958956, "grad_norm": 0.17244374752044678, "learning_rate": 1.9769040618414187e-05, "loss": 0.5232, "step": 1016 }, { "epoch": 0.22685701539147893, "grad_norm": 0.16292127966880798, "learning_rate": 1.9768537470920788e-05, "loss": 0.4989, "step": 1017 }, { "epoch": 0.2270800803033683, "grad_norm": 0.18515698611736298, "learning_rate": 1.9768033782384338e-05, "loss": 0.5316, "step": 1018 }, { "epoch": 0.22730314521525763, "grad_norm": 0.16262215375900269, "learning_rate": 1.9767529552832732e-05, "loss": 0.4855, "step": 1019 }, { "epoch": 0.227526210127147, "grad_norm": 0.17309491336345673, "learning_rate": 1.9767024782293902e-05, "loss": 0.5041, "step": 1020 }, { "epoch": 0.22774927503903636, "grad_norm": 0.1719563603401184, "learning_rate": 1.9766519470795803e-05, "loss": 0.5412, "step": 1021 }, { "epoch": 0.22797233995092572, "grad_norm": 0.2674558460712433, "learning_rate": 1.9766013618366417e-05, "loss": 0.5274, "step": 1022 }, { "epoch": 0.2281954048628151, "grad_norm": 0.16945339739322662, "learning_rate": 1.9765507225033772e-05, "loss": 0.5034, "step": 1023 }, { "epoch": 0.22841846977470445, "grad_norm": 0.19735541939735413, "learning_rate": 1.9765000290825908e-05, "loss": 0.5059, "step": 1024 }, { "epoch": 0.2286415346865938, "grad_norm": 0.17197129130363464, "learning_rate": 1.97644928157709e-05, "loss": 0.5133, "step": 1025 }, { "epoch": 0.22886459959848315, "grad_norm": 0.18425902724266052, "learning_rate": 1.976398479989686e-05, "loss": 0.5062, "step": 1026 }, { "epoch": 0.22908766451037252, "grad_norm": 0.4941766858100891, "learning_rate": 1.9763476243231924e-05, "loss": 0.535, "step": 1027 }, { "epoch": 0.22931072942226188, "grad_norm": 0.16707998514175415, "learning_rate": 1.976296714580426e-05, "loss": 0.5177, "step": 1028 }, { "epoch": 0.22953379433415125, "grad_norm": 0.173675537109375, "learning_rate": 1.9762457507642066e-05, "loss": 0.5234, "step": 1029 }, { "epoch": 0.22975685924604058, "grad_norm": 0.18508677184581757, "learning_rate": 1.9761947328773565e-05, "loss": 0.5073, "step": 1030 }, { "epoch": 0.22997992415792995, "grad_norm": 0.2566058039665222, "learning_rate": 1.9761436609227016e-05, "loss": 0.5176, "step": 1031 }, { "epoch": 0.2302029890698193, "grad_norm": 0.1702575534582138, "learning_rate": 1.9760925349030704e-05, "loss": 0.5106, "step": 1032 }, { "epoch": 0.23042605398170868, "grad_norm": 0.18858478963375092, "learning_rate": 1.976041354821295e-05, "loss": 0.5052, "step": 1033 }, { "epoch": 0.23064911889359804, "grad_norm": 0.191171795129776, "learning_rate": 1.9759901206802098e-05, "loss": 0.5643, "step": 1034 }, { "epoch": 0.2308721838054874, "grad_norm": 0.17536719143390656, "learning_rate": 1.9759388324826523e-05, "loss": 0.5344, "step": 1035 }, { "epoch": 0.23109524871737674, "grad_norm": 0.21609671413898468, "learning_rate": 1.9758874902314634e-05, "loss": 0.5197, "step": 1036 }, { "epoch": 0.2313183136292661, "grad_norm": 0.16850803792476654, "learning_rate": 1.9758360939294867e-05, "loss": 0.4967, "step": 1037 }, { "epoch": 0.23154137854115547, "grad_norm": 0.19448642432689667, "learning_rate": 1.9757846435795688e-05, "loss": 0.5171, "step": 1038 }, { "epoch": 0.23176444345304484, "grad_norm": 0.19304804503917694, "learning_rate": 1.9757331391845596e-05, "loss": 0.5491, "step": 1039 }, { "epoch": 0.2319875083649342, "grad_norm": 0.17683899402618408, "learning_rate": 1.975681580747312e-05, "loss": 0.535, "step": 1040 }, { "epoch": 0.23221057327682357, "grad_norm": 0.17681746184825897, "learning_rate": 1.9756299682706804e-05, "loss": 0.5053, "step": 1041 }, { "epoch": 0.2324336381887129, "grad_norm": 0.17382597923278809, "learning_rate": 1.9755783017575244e-05, "loss": 0.5063, "step": 1042 }, { "epoch": 0.23265670310060227, "grad_norm": 0.17841431498527527, "learning_rate": 1.9755265812107053e-05, "loss": 0.4845, "step": 1043 }, { "epoch": 0.23287976801249163, "grad_norm": 0.18297810852527618, "learning_rate": 1.9754748066330883e-05, "loss": 0.5418, "step": 1044 }, { "epoch": 0.233102832924381, "grad_norm": 0.19104409217834473, "learning_rate": 1.97542297802754e-05, "loss": 0.5417, "step": 1045 }, { "epoch": 0.23332589783627036, "grad_norm": 0.1772020310163498, "learning_rate": 1.975371095396932e-05, "loss": 0.5296, "step": 1046 }, { "epoch": 0.23354896274815973, "grad_norm": 0.18861500918865204, "learning_rate": 1.9753191587441372e-05, "loss": 0.5667, "step": 1047 }, { "epoch": 0.23377202766004906, "grad_norm": 0.17586645483970642, "learning_rate": 1.9752671680720324e-05, "loss": 0.5001, "step": 1048 }, { "epoch": 0.23399509257193843, "grad_norm": 0.7259854674339294, "learning_rate": 1.975215123383497e-05, "loss": 0.5456, "step": 1049 }, { "epoch": 0.2342181574838278, "grad_norm": 0.18736739456653595, "learning_rate": 1.9751630246814136e-05, "loss": 0.5231, "step": 1050 }, { "epoch": 0.23444122239571716, "grad_norm": 0.2538264989852905, "learning_rate": 1.9751108719686683e-05, "loss": 0.5387, "step": 1051 }, { "epoch": 0.23466428730760652, "grad_norm": 0.17714229226112366, "learning_rate": 1.9750586652481492e-05, "loss": 0.5076, "step": 1052 }, { "epoch": 0.23488735221949586, "grad_norm": 0.3667065501213074, "learning_rate": 1.9750064045227474e-05, "loss": 0.5432, "step": 1053 }, { "epoch": 0.23511041713138522, "grad_norm": 0.18610940873622894, "learning_rate": 1.9749540897953584e-05, "loss": 0.5307, "step": 1054 }, { "epoch": 0.2353334820432746, "grad_norm": 0.18711121380329132, "learning_rate": 1.974901721068879e-05, "loss": 0.5256, "step": 1055 }, { "epoch": 0.23555654695516395, "grad_norm": 0.17803776264190674, "learning_rate": 1.97484929834621e-05, "loss": 0.5076, "step": 1056 }, { "epoch": 0.23577961186705332, "grad_norm": 0.19948653876781464, "learning_rate": 1.9747968216302545e-05, "loss": 0.5185, "step": 1057 }, { "epoch": 0.23600267677894268, "grad_norm": 0.3181793987751007, "learning_rate": 1.9747442909239198e-05, "loss": 0.4874, "step": 1058 }, { "epoch": 0.23622574169083202, "grad_norm": 0.1865629106760025, "learning_rate": 1.9746917062301146e-05, "loss": 0.52, "step": 1059 }, { "epoch": 0.23644880660272138, "grad_norm": 0.23274224996566772, "learning_rate": 1.9746390675517514e-05, "loss": 0.496, "step": 1060 }, { "epoch": 0.23667187151461075, "grad_norm": 0.2044648379087448, "learning_rate": 1.974586374891746e-05, "loss": 0.5173, "step": 1061 }, { "epoch": 0.2368949364265001, "grad_norm": 0.18317201733589172, "learning_rate": 1.974533628253017e-05, "loss": 0.5282, "step": 1062 }, { "epoch": 0.23711800133838948, "grad_norm": 0.18429698050022125, "learning_rate": 1.9744808276384858e-05, "loss": 0.5395, "step": 1063 }, { "epoch": 0.23734106625027884, "grad_norm": 0.20926502346992493, "learning_rate": 1.9744279730510764e-05, "loss": 0.5111, "step": 1064 }, { "epoch": 0.23756413116216818, "grad_norm": 0.17823931574821472, "learning_rate": 1.974375064493716e-05, "loss": 0.5258, "step": 1065 }, { "epoch": 0.23778719607405754, "grad_norm": 0.19109368324279785, "learning_rate": 1.9743221019693362e-05, "loss": 0.5512, "step": 1066 }, { "epoch": 0.2380102609859469, "grad_norm": 0.21680758893489838, "learning_rate": 1.9742690854808692e-05, "loss": 0.4951, "step": 1067 }, { "epoch": 0.23823332589783627, "grad_norm": 0.1768484115600586, "learning_rate": 1.974216015031252e-05, "loss": 0.534, "step": 1068 }, { "epoch": 0.23845639080972564, "grad_norm": 0.18384598195552826, "learning_rate": 1.974162890623424e-05, "loss": 0.502, "step": 1069 }, { "epoch": 0.238679455721615, "grad_norm": 0.17585726082324982, "learning_rate": 1.974109712260327e-05, "loss": 0.5177, "step": 1070 }, { "epoch": 0.23890252063350434, "grad_norm": 0.17469032108783722, "learning_rate": 1.9740564799449073e-05, "loss": 0.553, "step": 1071 }, { "epoch": 0.2391255855453937, "grad_norm": 0.21859519183635712, "learning_rate": 1.9740031936801122e-05, "loss": 0.5204, "step": 1072 }, { "epoch": 0.23934865045728307, "grad_norm": 0.36654403805732727, "learning_rate": 1.9739498534688936e-05, "loss": 0.5375, "step": 1073 }, { "epoch": 0.23957171536917243, "grad_norm": 0.17866800725460052, "learning_rate": 1.973896459314206e-05, "loss": 0.5477, "step": 1074 }, { "epoch": 0.2397947802810618, "grad_norm": 0.1822461485862732, "learning_rate": 1.973843011219006e-05, "loss": 0.5197, "step": 1075 }, { "epoch": 0.24001784519295116, "grad_norm": 0.17180079221725464, "learning_rate": 1.9737895091862545e-05, "loss": 0.5269, "step": 1076 }, { "epoch": 0.2402409101048405, "grad_norm": 0.17935633659362793, "learning_rate": 1.9737359532189147e-05, "loss": 0.5279, "step": 1077 }, { "epoch": 0.24046397501672986, "grad_norm": 0.22954648733139038, "learning_rate": 1.9736823433199524e-05, "loss": 0.5199, "step": 1078 }, { "epoch": 0.24068703992861923, "grad_norm": 0.17607101798057556, "learning_rate": 1.973628679492338e-05, "loss": 0.5121, "step": 1079 }, { "epoch": 0.2409101048405086, "grad_norm": 0.17458245158195496, "learning_rate": 1.9735749617390422e-05, "loss": 0.5363, "step": 1080 }, { "epoch": 0.24113316975239796, "grad_norm": 0.17682591080665588, "learning_rate": 1.9735211900630414e-05, "loss": 0.5254, "step": 1081 }, { "epoch": 0.2413562346642873, "grad_norm": 0.17350362241268158, "learning_rate": 1.9734673644673133e-05, "loss": 0.5381, "step": 1082 }, { "epoch": 0.24157929957617666, "grad_norm": 0.17231930792331696, "learning_rate": 1.973413484954839e-05, "loss": 0.5118, "step": 1083 }, { "epoch": 0.24180236448806602, "grad_norm": 0.21497975289821625, "learning_rate": 1.9733595515286032e-05, "loss": 0.5353, "step": 1084 }, { "epoch": 0.2420254293999554, "grad_norm": 0.1669948548078537, "learning_rate": 1.9733055641915926e-05, "loss": 0.5216, "step": 1085 }, { "epoch": 0.24224849431184475, "grad_norm": 0.18229244649410248, "learning_rate": 1.9732515229467973e-05, "loss": 0.5379, "step": 1086 }, { "epoch": 0.24247155922373412, "grad_norm": 0.17432808876037598, "learning_rate": 1.973197427797211e-05, "loss": 0.5461, "step": 1087 }, { "epoch": 0.24269462413562345, "grad_norm": 0.18571536242961884, "learning_rate": 1.9731432787458294e-05, "loss": 0.5469, "step": 1088 }, { "epoch": 0.24291768904751282, "grad_norm": 0.18052545189857483, "learning_rate": 1.9730890757956517e-05, "loss": 0.5416, "step": 1089 }, { "epoch": 0.24314075395940218, "grad_norm": 0.23118536174297333, "learning_rate": 1.97303481894968e-05, "loss": 0.5063, "step": 1090 }, { "epoch": 0.24336381887129155, "grad_norm": 0.16191843152046204, "learning_rate": 1.9729805082109194e-05, "loss": 0.5178, "step": 1091 }, { "epoch": 0.2435868837831809, "grad_norm": 0.17042016983032227, "learning_rate": 1.9729261435823782e-05, "loss": 0.5024, "step": 1092 }, { "epoch": 0.24380994869507028, "grad_norm": 0.21003969013690948, "learning_rate": 1.972871725067067e-05, "loss": 0.5106, "step": 1093 }, { "epoch": 0.24403301360695961, "grad_norm": 0.22672854363918304, "learning_rate": 1.972817252668e-05, "loss": 0.5309, "step": 1094 }, { "epoch": 0.24425607851884898, "grad_norm": 0.16994351148605347, "learning_rate": 1.9727627263881942e-05, "loss": 0.5023, "step": 1095 }, { "epoch": 0.24447914343073834, "grad_norm": 0.17019368708133698, "learning_rate": 1.9727081462306697e-05, "loss": 0.5316, "step": 1096 }, { "epoch": 0.2447022083426277, "grad_norm": 0.17145198583602905, "learning_rate": 1.97265351219845e-05, "loss": 0.5148, "step": 1097 }, { "epoch": 0.24492527325451707, "grad_norm": 0.20565351843833923, "learning_rate": 1.9725988242945598e-05, "loss": 0.5445, "step": 1098 }, { "epoch": 0.24514833816640644, "grad_norm": 0.1741577833890915, "learning_rate": 1.9725440825220296e-05, "loss": 0.4958, "step": 1099 }, { "epoch": 0.24537140307829577, "grad_norm": 0.18705230951309204, "learning_rate": 1.9724892868838902e-05, "loss": 0.5105, "step": 1100 }, { "epoch": 0.24559446799018514, "grad_norm": 0.5166819095611572, "learning_rate": 1.9724344373831768e-05, "loss": 0.5414, "step": 1101 }, { "epoch": 0.2458175329020745, "grad_norm": 0.17351533472537994, "learning_rate": 1.9723795340229274e-05, "loss": 0.5024, "step": 1102 }, { "epoch": 0.24604059781396387, "grad_norm": 0.1658518761396408, "learning_rate": 1.972324576806183e-05, "loss": 0.4977, "step": 1103 }, { "epoch": 0.24626366272585323, "grad_norm": 0.16890370845794678, "learning_rate": 1.972269565735987e-05, "loss": 0.5359, "step": 1104 }, { "epoch": 0.24648672763774257, "grad_norm": 0.18016333878040314, "learning_rate": 1.9722145008153873e-05, "loss": 0.5394, "step": 1105 }, { "epoch": 0.24670979254963193, "grad_norm": 0.16652169823646545, "learning_rate": 1.9721593820474326e-05, "loss": 0.5089, "step": 1106 }, { "epoch": 0.2469328574615213, "grad_norm": 0.1999325454235077, "learning_rate": 1.9721042094351764e-05, "loss": 0.5541, "step": 1107 }, { "epoch": 0.24715592237341066, "grad_norm": 0.16372385621070862, "learning_rate": 1.972048982981674e-05, "loss": 0.4942, "step": 1108 }, { "epoch": 0.24737898728530003, "grad_norm": 0.20245228707790375, "learning_rate": 1.971993702689985e-05, "loss": 0.5034, "step": 1109 }, { "epoch": 0.2476020521971894, "grad_norm": 0.17487917840480804, "learning_rate": 1.97193836856317e-05, "loss": 0.5231, "step": 1110 }, { "epoch": 0.24782511710907873, "grad_norm": 0.1696334332227707, "learning_rate": 1.971882980604295e-05, "loss": 0.5141, "step": 1111 }, { "epoch": 0.2480481820209681, "grad_norm": 0.17021594941616058, "learning_rate": 1.971827538816427e-05, "loss": 0.5081, "step": 1112 }, { "epoch": 0.24827124693285746, "grad_norm": 0.18088696897029877, "learning_rate": 1.9717720432026367e-05, "loss": 0.5743, "step": 1113 }, { "epoch": 0.24849431184474682, "grad_norm": 0.17647258937358856, "learning_rate": 1.9717164937659984e-05, "loss": 0.5289, "step": 1114 }, { "epoch": 0.2487173767566362, "grad_norm": 0.17265263199806213, "learning_rate": 1.971660890509588e-05, "loss": 0.5296, "step": 1115 }, { "epoch": 0.24894044166852555, "grad_norm": 0.16816379129886627, "learning_rate": 1.971605233436485e-05, "loss": 0.5263, "step": 1116 }, { "epoch": 0.2491635065804149, "grad_norm": 0.17548470199108124, "learning_rate": 1.9715495225497736e-05, "loss": 0.5315, "step": 1117 }, { "epoch": 0.24938657149230425, "grad_norm": 0.1775428056716919, "learning_rate": 1.9714937578525374e-05, "loss": 0.5227, "step": 1118 }, { "epoch": 0.24960963640419362, "grad_norm": 0.17629674077033997, "learning_rate": 1.971437939347866e-05, "loss": 0.5475, "step": 1119 }, { "epoch": 0.24983270131608298, "grad_norm": 0.20265498757362366, "learning_rate": 1.9713820670388518e-05, "loss": 0.5415, "step": 1120 }, { "epoch": 0.2500557662279723, "grad_norm": 0.17958010733127594, "learning_rate": 1.9713261409285876e-05, "loss": 0.5491, "step": 1121 }, { "epoch": 0.2502788311398617, "grad_norm": 0.1667289137840271, "learning_rate": 1.9712701610201723e-05, "loss": 0.5319, "step": 1122 }, { "epoch": 0.25050189605175105, "grad_norm": 0.17537854611873627, "learning_rate": 1.9712141273167058e-05, "loss": 0.5033, "step": 1123 }, { "epoch": 0.25072496096364044, "grad_norm": 0.1665065735578537, "learning_rate": 1.9711580398212918e-05, "loss": 0.5043, "step": 1124 }, { "epoch": 0.2509480258755298, "grad_norm": 0.17893299460411072, "learning_rate": 1.9711018985370366e-05, "loss": 0.5424, "step": 1125 }, { "epoch": 0.2511710907874191, "grad_norm": 0.17293697595596313, "learning_rate": 1.97104570346705e-05, "loss": 0.546, "step": 1126 }, { "epoch": 0.2513941556993085, "grad_norm": 0.16615547239780426, "learning_rate": 1.970989454614444e-05, "loss": 0.5163, "step": 1127 }, { "epoch": 0.25161722061119784, "grad_norm": 0.16944670677185059, "learning_rate": 1.9709331519823343e-05, "loss": 0.5471, "step": 1128 }, { "epoch": 0.25184028552308724, "grad_norm": 0.179295152425766, "learning_rate": 1.9708767955738394e-05, "loss": 0.5381, "step": 1129 }, { "epoch": 0.2520633504349766, "grad_norm": 0.1665600687265396, "learning_rate": 1.9708203853920803e-05, "loss": 0.4863, "step": 1130 }, { "epoch": 0.2522864153468659, "grad_norm": 0.1918114423751831, "learning_rate": 1.970763921440182e-05, "loss": 0.4875, "step": 1131 }, { "epoch": 0.2525094802587553, "grad_norm": 0.16761860251426697, "learning_rate": 1.9707074037212707e-05, "loss": 0.5314, "step": 1132 }, { "epoch": 0.25273254517064464, "grad_norm": 0.19442524015903473, "learning_rate": 1.970650832238478e-05, "loss": 0.5186, "step": 1133 }, { "epoch": 0.25295561008253403, "grad_norm": 0.17174071073532104, "learning_rate": 1.9705942069949362e-05, "loss": 0.5327, "step": 1134 }, { "epoch": 0.25317867499442337, "grad_norm": 0.18218085169792175, "learning_rate": 1.970537527993782e-05, "loss": 0.5665, "step": 1135 }, { "epoch": 0.25340173990631276, "grad_norm": 0.17595386505126953, "learning_rate": 1.9704807952381542e-05, "loss": 0.5581, "step": 1136 }, { "epoch": 0.2536248048182021, "grad_norm": 0.20507752895355225, "learning_rate": 1.9704240087311963e-05, "loss": 0.541, "step": 1137 }, { "epoch": 0.25384786973009144, "grad_norm": 0.1604541391134262, "learning_rate": 1.970367168476052e-05, "loss": 0.5223, "step": 1138 }, { "epoch": 0.25407093464198083, "grad_norm": 0.16794823110103607, "learning_rate": 1.9703102744758703e-05, "loss": 0.5444, "step": 1139 }, { "epoch": 0.25429399955387016, "grad_norm": 0.16887861490249634, "learning_rate": 1.9702533267338015e-05, "loss": 0.5237, "step": 1140 }, { "epoch": 0.25451706446575956, "grad_norm": 0.16295503079891205, "learning_rate": 1.970196325253001e-05, "loss": 0.5293, "step": 1141 }, { "epoch": 0.2547401293776489, "grad_norm": 0.16665813326835632, "learning_rate": 1.9701392700366247e-05, "loss": 0.5104, "step": 1142 }, { "epoch": 0.25496319428953823, "grad_norm": 0.16648006439208984, "learning_rate": 1.970082161087834e-05, "loss": 0.5001, "step": 1143 }, { "epoch": 0.2551862592014276, "grad_norm": 0.16400828957557678, "learning_rate": 1.9700249984097907e-05, "loss": 0.5106, "step": 1144 }, { "epoch": 0.25540932411331696, "grad_norm": 0.16578614711761475, "learning_rate": 1.969967782005661e-05, "loss": 0.5134, "step": 1145 }, { "epoch": 0.25563238902520635, "grad_norm": 0.16847828030586243, "learning_rate": 1.9699105118786145e-05, "loss": 0.4994, "step": 1146 }, { "epoch": 0.2558554539370957, "grad_norm": 0.18443261086940765, "learning_rate": 1.9698531880318228e-05, "loss": 0.5136, "step": 1147 }, { "epoch": 0.2560785188489851, "grad_norm": 0.17291708290576935, "learning_rate": 1.969795810468461e-05, "loss": 0.5243, "step": 1148 }, { "epoch": 0.2563015837608744, "grad_norm": 0.18102054297924042, "learning_rate": 1.9697383791917068e-05, "loss": 0.4966, "step": 1149 }, { "epoch": 0.25652464867276376, "grad_norm": 0.17706115543842316, "learning_rate": 1.9696808942047414e-05, "loss": 0.5332, "step": 1150 }, { "epoch": 0.25674771358465315, "grad_norm": 0.1936006247997284, "learning_rate": 1.9696233555107484e-05, "loss": 0.5325, "step": 1151 }, { "epoch": 0.2569707784965425, "grad_norm": 0.19508236646652222, "learning_rate": 1.969565763112915e-05, "loss": 0.509, "step": 1152 }, { "epoch": 0.2571938434084319, "grad_norm": 0.16369056701660156, "learning_rate": 1.9695081170144306e-05, "loss": 0.5188, "step": 1153 }, { "epoch": 0.2574169083203212, "grad_norm": 0.16955684125423431, "learning_rate": 1.9694504172184885e-05, "loss": 0.5316, "step": 1154 }, { "epoch": 0.25763997323221055, "grad_norm": 0.16252164542675018, "learning_rate": 1.969392663728284e-05, "loss": 0.5291, "step": 1155 }, { "epoch": 0.25786303814409994, "grad_norm": 0.17399340867996216, "learning_rate": 1.969334856547016e-05, "loss": 0.5153, "step": 1156 }, { "epoch": 0.2580861030559893, "grad_norm": 0.17638690769672394, "learning_rate": 1.9692769956778867e-05, "loss": 0.5128, "step": 1157 }, { "epoch": 0.2583091679678787, "grad_norm": 0.1711435467004776, "learning_rate": 1.9692190811241e-05, "loss": 0.5205, "step": 1158 }, { "epoch": 0.258532232879768, "grad_norm": 0.18419131636619568, "learning_rate": 1.9691611128888643e-05, "loss": 0.5312, "step": 1159 }, { "epoch": 0.25875529779165735, "grad_norm": 0.1925041675567627, "learning_rate": 1.9691030909753894e-05, "loss": 0.5479, "step": 1160 }, { "epoch": 0.25897836270354674, "grad_norm": 0.1986900418996811, "learning_rate": 1.9690450153868895e-05, "loss": 0.5095, "step": 1161 }, { "epoch": 0.2592014276154361, "grad_norm": 0.17978021502494812, "learning_rate": 1.9689868861265816e-05, "loss": 0.5015, "step": 1162 }, { "epoch": 0.25942449252732547, "grad_norm": 0.16919931769371033, "learning_rate": 1.9689287031976845e-05, "loss": 0.5227, "step": 1163 }, { "epoch": 0.2596475574392148, "grad_norm": 0.17485311627388, "learning_rate": 1.9688704666034208e-05, "loss": 0.5386, "step": 1164 }, { "epoch": 0.2598706223511042, "grad_norm": 0.17683085799217224, "learning_rate": 1.9688121763470165e-05, "loss": 0.5201, "step": 1165 }, { "epoch": 0.26009368726299353, "grad_norm": 0.18902894854545593, "learning_rate": 1.9687538324316997e-05, "loss": 0.51, "step": 1166 }, { "epoch": 0.26031675217488287, "grad_norm": 0.17308862507343292, "learning_rate": 1.968695434860702e-05, "loss": 0.5112, "step": 1167 }, { "epoch": 0.26053981708677226, "grad_norm": 0.17456986010074615, "learning_rate": 1.9686369836372577e-05, "loss": 0.5182, "step": 1168 }, { "epoch": 0.2607628819986616, "grad_norm": 0.17276804149150848, "learning_rate": 1.9685784787646044e-05, "loss": 0.5389, "step": 1169 }, { "epoch": 0.260985946910551, "grad_norm": 0.1710672229528427, "learning_rate": 1.9685199202459824e-05, "loss": 0.4948, "step": 1170 }, { "epoch": 0.26120901182244033, "grad_norm": 0.18158847093582153, "learning_rate": 1.9684613080846347e-05, "loss": 0.5332, "step": 1171 }, { "epoch": 0.26143207673432967, "grad_norm": 0.17782646417617798, "learning_rate": 1.968402642283808e-05, "loss": 0.5519, "step": 1172 }, { "epoch": 0.26165514164621906, "grad_norm": 0.1716913878917694, "learning_rate": 1.9683439228467515e-05, "loss": 0.5074, "step": 1173 }, { "epoch": 0.2618782065581084, "grad_norm": 0.1818932741880417, "learning_rate": 1.9682851497767175e-05, "loss": 0.5401, "step": 1174 }, { "epoch": 0.2621012714699978, "grad_norm": 0.1722068190574646, "learning_rate": 1.9682263230769612e-05, "loss": 0.52, "step": 1175 }, { "epoch": 0.2623243363818871, "grad_norm": 0.17454420030117035, "learning_rate": 1.9681674427507405e-05, "loss": 0.5063, "step": 1176 }, { "epoch": 0.2625474012937765, "grad_norm": 0.1726730465888977, "learning_rate": 1.9681085088013174e-05, "loss": 0.5343, "step": 1177 }, { "epoch": 0.26277046620566585, "grad_norm": 0.1802458018064499, "learning_rate": 1.9680495212319547e-05, "loss": 0.5218, "step": 1178 }, { "epoch": 0.2629935311175552, "grad_norm": 0.1708204448223114, "learning_rate": 1.9679904800459205e-05, "loss": 0.4943, "step": 1179 }, { "epoch": 0.2632165960294446, "grad_norm": 0.1644791066646576, "learning_rate": 1.9679313852464846e-05, "loss": 0.534, "step": 1180 }, { "epoch": 0.2634396609413339, "grad_norm": 0.16891272366046906, "learning_rate": 1.9678722368369203e-05, "loss": 0.519, "step": 1181 }, { "epoch": 0.2636627258532233, "grad_norm": 0.1720377802848816, "learning_rate": 1.9678130348205032e-05, "loss": 0.5362, "step": 1182 }, { "epoch": 0.26388579076511265, "grad_norm": 0.17827892303466797, "learning_rate": 1.9677537792005124e-05, "loss": 0.5387, "step": 1183 }, { "epoch": 0.264108855677002, "grad_norm": 0.17692813277244568, "learning_rate": 1.96769446998023e-05, "loss": 0.5051, "step": 1184 }, { "epoch": 0.2643319205888914, "grad_norm": 0.17577511072158813, "learning_rate": 1.9676351071629405e-05, "loss": 0.5162, "step": 1185 }, { "epoch": 0.2645549855007807, "grad_norm": 0.1805581897497177, "learning_rate": 1.9675756907519325e-05, "loss": 0.5528, "step": 1186 }, { "epoch": 0.2647780504126701, "grad_norm": 0.17259716987609863, "learning_rate": 1.967516220750496e-05, "loss": 0.5381, "step": 1187 }, { "epoch": 0.26500111532455944, "grad_norm": 0.16992929577827454, "learning_rate": 1.9674566971619256e-05, "loss": 0.51, "step": 1188 }, { "epoch": 0.2652241802364488, "grad_norm": 0.16945737600326538, "learning_rate": 1.9673971199895177e-05, "loss": 0.4987, "step": 1189 }, { "epoch": 0.2654472451483382, "grad_norm": 0.16880926489830017, "learning_rate": 1.967337489236572e-05, "loss": 0.5188, "step": 1190 }, { "epoch": 0.2656703100602275, "grad_norm": 0.17628756165504456, "learning_rate": 1.9672778049063915e-05, "loss": 0.5346, "step": 1191 }, { "epoch": 0.2658933749721169, "grad_norm": 0.17406663298606873, "learning_rate": 1.967218067002282e-05, "loss": 0.5493, "step": 1192 }, { "epoch": 0.26611643988400624, "grad_norm": 0.17142999172210693, "learning_rate": 1.9671582755275515e-05, "loss": 0.5256, "step": 1193 }, { "epoch": 0.26633950479589563, "grad_norm": 0.17752200365066528, "learning_rate": 1.9670984304855125e-05, "loss": 0.5237, "step": 1194 }, { "epoch": 0.26656256970778497, "grad_norm": 0.17670097947120667, "learning_rate": 1.9670385318794785e-05, "loss": 0.5226, "step": 1195 }, { "epoch": 0.2667856346196743, "grad_norm": 0.1656324565410614, "learning_rate": 1.966978579712768e-05, "loss": 0.494, "step": 1196 }, { "epoch": 0.2670086995315637, "grad_norm": 0.17369796335697174, "learning_rate": 1.966918573988701e-05, "loss": 0.506, "step": 1197 }, { "epoch": 0.26723176444345303, "grad_norm": 0.17040349543094635, "learning_rate": 1.9668585147106017e-05, "loss": 0.5112, "step": 1198 }, { "epoch": 0.2674548293553424, "grad_norm": 0.1750401258468628, "learning_rate": 1.9667984018817957e-05, "loss": 0.5465, "step": 1199 }, { "epoch": 0.26767789426723176, "grad_norm": 0.16083772480487823, "learning_rate": 1.9667382355056128e-05, "loss": 0.5105, "step": 1200 }, { "epoch": 0.2679009591791211, "grad_norm": 0.16108831763267517, "learning_rate": 1.9666780155853854e-05, "loss": 0.512, "step": 1201 }, { "epoch": 0.2681240240910105, "grad_norm": 0.17286653816699982, "learning_rate": 1.966617742124449e-05, "loss": 0.5686, "step": 1202 }, { "epoch": 0.26834708900289983, "grad_norm": 0.17905832827091217, "learning_rate": 1.9665574151261418e-05, "loss": 0.5423, "step": 1203 }, { "epoch": 0.2685701539147892, "grad_norm": 0.16681291162967682, "learning_rate": 1.966497034593805e-05, "loss": 0.5041, "step": 1204 }, { "epoch": 0.26879321882667856, "grad_norm": 0.17184089124202728, "learning_rate": 1.9664366005307828e-05, "loss": 0.5501, "step": 1205 }, { "epoch": 0.2690162837385679, "grad_norm": 0.19392690062522888, "learning_rate": 1.9663761129404228e-05, "loss": 0.5398, "step": 1206 }, { "epoch": 0.2692393486504573, "grad_norm": 0.16529332101345062, "learning_rate": 1.9663155718260746e-05, "loss": 0.5086, "step": 1207 }, { "epoch": 0.2694624135623466, "grad_norm": 0.1649094521999359, "learning_rate": 1.966254977191092e-05, "loss": 0.515, "step": 1208 }, { "epoch": 0.269685478474236, "grad_norm": 0.1709623634815216, "learning_rate": 1.9661943290388302e-05, "loss": 0.5315, "step": 1209 }, { "epoch": 0.26990854338612535, "grad_norm": 0.164725661277771, "learning_rate": 1.9661336273726496e-05, "loss": 0.4817, "step": 1210 }, { "epoch": 0.27013160829801475, "grad_norm": 0.1715349704027176, "learning_rate": 1.966072872195911e-05, "loss": 0.4706, "step": 1211 }, { "epoch": 0.2703546732099041, "grad_norm": 0.1747862696647644, "learning_rate": 1.9660120635119798e-05, "loss": 0.5055, "step": 1212 }, { "epoch": 0.2705777381217934, "grad_norm": 0.17992718517780304, "learning_rate": 1.9659512013242245e-05, "loss": 0.4946, "step": 1213 }, { "epoch": 0.2708008030336828, "grad_norm": 0.1895056813955307, "learning_rate": 1.9658902856360153e-05, "loss": 0.5258, "step": 1214 }, { "epoch": 0.27102386794557215, "grad_norm": 0.19161482155323029, "learning_rate": 1.9658293164507265e-05, "loss": 0.5206, "step": 1215 }, { "epoch": 0.27124693285746154, "grad_norm": 0.1860935389995575, "learning_rate": 1.965768293771735e-05, "loss": 0.5484, "step": 1216 }, { "epoch": 0.2714699977693509, "grad_norm": 0.17763131856918335, "learning_rate": 1.9657072176024202e-05, "loss": 0.5029, "step": 1217 }, { "epoch": 0.2716930626812402, "grad_norm": 0.18652020394802094, "learning_rate": 1.9656460879461652e-05, "loss": 0.5161, "step": 1218 }, { "epoch": 0.2719161275931296, "grad_norm": 0.2644646465778351, "learning_rate": 1.965584904806356e-05, "loss": 0.5371, "step": 1219 }, { "epoch": 0.27213919250501895, "grad_norm": 0.1769624799489975, "learning_rate": 1.9655236681863806e-05, "loss": 0.4892, "step": 1220 }, { "epoch": 0.27236225741690834, "grad_norm": 0.1740611046552658, "learning_rate": 1.9654623780896313e-05, "loss": 0.5162, "step": 1221 }, { "epoch": 0.2725853223287977, "grad_norm": 0.16314013302326202, "learning_rate": 1.9654010345195026e-05, "loss": 0.5304, "step": 1222 }, { "epoch": 0.27280838724068707, "grad_norm": 0.18545354902744293, "learning_rate": 1.9653396374793915e-05, "loss": 0.5181, "step": 1223 }, { "epoch": 0.2730314521525764, "grad_norm": 0.17202220857143402, "learning_rate": 1.9652781869726993e-05, "loss": 0.5486, "step": 1224 }, { "epoch": 0.27325451706446574, "grad_norm": 0.18883365392684937, "learning_rate": 1.9652166830028295e-05, "loss": 0.5565, "step": 1225 }, { "epoch": 0.27347758197635513, "grad_norm": 0.17693057656288147, "learning_rate": 1.9651551255731884e-05, "loss": 0.5275, "step": 1226 }, { "epoch": 0.27370064688824447, "grad_norm": 0.17243415117263794, "learning_rate": 1.9650935146871848e-05, "loss": 0.5037, "step": 1227 }, { "epoch": 0.27392371180013386, "grad_norm": 0.18462933599948883, "learning_rate": 1.9650318503482323e-05, "loss": 0.5573, "step": 1228 }, { "epoch": 0.2741467767120232, "grad_norm": 0.3310684263706207, "learning_rate": 1.964970132559745e-05, "loss": 0.5186, "step": 1229 }, { "epoch": 0.27436984162391254, "grad_norm": 0.16663436591625214, "learning_rate": 1.964908361325142e-05, "loss": 0.5067, "step": 1230 }, { "epoch": 0.27459290653580193, "grad_norm": 0.16989050805568695, "learning_rate": 1.964846536647845e-05, "loss": 0.5084, "step": 1231 }, { "epoch": 0.27481597144769127, "grad_norm": 0.16925394535064697, "learning_rate": 1.9647846585312775e-05, "loss": 0.5348, "step": 1232 }, { "epoch": 0.27503903635958066, "grad_norm": 0.176070898771286, "learning_rate": 1.9647227269788665e-05, "loss": 0.5249, "step": 1233 }, { "epoch": 0.27526210127147, "grad_norm": 0.16656488180160522, "learning_rate": 1.9646607419940428e-05, "loss": 0.5408, "step": 1234 }, { "epoch": 0.27548516618335933, "grad_norm": 0.16989517211914062, "learning_rate": 1.964598703580239e-05, "loss": 0.5033, "step": 1235 }, { "epoch": 0.2757082310952487, "grad_norm": 0.18006569147109985, "learning_rate": 1.9645366117408918e-05, "loss": 0.5354, "step": 1236 }, { "epoch": 0.27593129600713806, "grad_norm": 0.17241814732551575, "learning_rate": 1.9644744664794394e-05, "loss": 0.508, "step": 1237 }, { "epoch": 0.27615436091902745, "grad_norm": 0.20181423425674438, "learning_rate": 1.9644122677993246e-05, "loss": 0.4536, "step": 1238 }, { "epoch": 0.2763774258309168, "grad_norm": 0.17486101388931274, "learning_rate": 1.964350015703992e-05, "loss": 0.5597, "step": 1239 }, { "epoch": 0.2766004907428062, "grad_norm": 0.1774255484342575, "learning_rate": 1.9642877101968894e-05, "loss": 0.5511, "step": 1240 }, { "epoch": 0.2768235556546955, "grad_norm": 0.3290056586265564, "learning_rate": 1.964225351281468e-05, "loss": 0.5278, "step": 1241 }, { "epoch": 0.27704662056658486, "grad_norm": 0.20791974663734436, "learning_rate": 1.9641629389611813e-05, "loss": 0.5261, "step": 1242 }, { "epoch": 0.27726968547847425, "grad_norm": 0.17645849287509918, "learning_rate": 1.9641004732394862e-05, "loss": 0.526, "step": 1243 }, { "epoch": 0.2774927503903636, "grad_norm": 0.16956675052642822, "learning_rate": 1.9640379541198425e-05, "loss": 0.5489, "step": 1244 }, { "epoch": 0.277715815302253, "grad_norm": 0.18473365902900696, "learning_rate": 1.9639753816057128e-05, "loss": 0.5422, "step": 1245 }, { "epoch": 0.2779388802141423, "grad_norm": 0.262015700340271, "learning_rate": 1.9639127557005627e-05, "loss": 0.5031, "step": 1246 }, { "epoch": 0.27816194512603165, "grad_norm": 0.19820185005664825, "learning_rate": 1.963850076407861e-05, "loss": 0.5132, "step": 1247 }, { "epoch": 0.27838501003792104, "grad_norm": 0.1682923436164856, "learning_rate": 1.9637873437310795e-05, "loss": 0.5214, "step": 1248 }, { "epoch": 0.2786080749498104, "grad_norm": 0.17070676386356354, "learning_rate": 1.9637245576736923e-05, "loss": 0.5368, "step": 1249 }, { "epoch": 0.2788311398616998, "grad_norm": 0.17165376245975494, "learning_rate": 1.9636617182391768e-05, "loss": 0.5282, "step": 1250 }, { "epoch": 0.2790542047735891, "grad_norm": 0.17272816598415375, "learning_rate": 1.963598825431014e-05, "loss": 0.5657, "step": 1251 }, { "epoch": 0.2792772696854785, "grad_norm": 0.1659235805273056, "learning_rate": 1.9635358792526865e-05, "loss": 0.5181, "step": 1252 }, { "epoch": 0.27950033459736784, "grad_norm": 0.1700238287448883, "learning_rate": 1.9634728797076818e-05, "loss": 0.5194, "step": 1253 }, { "epoch": 0.2797233995092572, "grad_norm": 0.1710444688796997, "learning_rate": 1.9634098267994882e-05, "loss": 0.5405, "step": 1254 }, { "epoch": 0.27994646442114657, "grad_norm": 0.1748325079679489, "learning_rate": 1.9633467205315983e-05, "loss": 0.5295, "step": 1255 }, { "epoch": 0.2801695293330359, "grad_norm": 0.1600925624370575, "learning_rate": 1.9632835609075072e-05, "loss": 0.5448, "step": 1256 }, { "epoch": 0.2803925942449253, "grad_norm": 0.1937401294708252, "learning_rate": 1.9632203479307132e-05, "loss": 0.5145, "step": 1257 }, { "epoch": 0.28061565915681463, "grad_norm": 0.1888452023267746, "learning_rate": 1.9631570816047176e-05, "loss": 0.5376, "step": 1258 }, { "epoch": 0.28083872406870397, "grad_norm": 0.16786231100559235, "learning_rate": 1.963093761933024e-05, "loss": 0.5336, "step": 1259 }, { "epoch": 0.28106178898059336, "grad_norm": 0.16962790489196777, "learning_rate": 1.9630303889191406e-05, "loss": 0.5161, "step": 1260 }, { "epoch": 0.2812848538924827, "grad_norm": 0.17393000423908234, "learning_rate": 1.9629669625665757e-05, "loss": 0.5098, "step": 1261 }, { "epoch": 0.2815079188043721, "grad_norm": 0.18124370276927948, "learning_rate": 1.9629034828788435e-05, "loss": 0.5155, "step": 1262 }, { "epoch": 0.28173098371626143, "grad_norm": 0.16687680780887604, "learning_rate": 1.962839949859459e-05, "loss": 0.5189, "step": 1263 }, { "epoch": 0.28195404862815077, "grad_norm": 0.1622602790594101, "learning_rate": 1.9627763635119423e-05, "loss": 0.4974, "step": 1264 }, { "epoch": 0.28217711354004016, "grad_norm": 0.16674260795116425, "learning_rate": 1.9627127238398142e-05, "loss": 0.4923, "step": 1265 }, { "epoch": 0.2824001784519295, "grad_norm": 0.1815263032913208, "learning_rate": 1.9626490308465996e-05, "loss": 0.5048, "step": 1266 }, { "epoch": 0.2826232433638189, "grad_norm": 0.17753565311431885, "learning_rate": 1.9625852845358265e-05, "loss": 0.5326, "step": 1267 }, { "epoch": 0.2828463082757082, "grad_norm": 0.1820516288280487, "learning_rate": 1.9625214849110253e-05, "loss": 0.5289, "step": 1268 }, { "epoch": 0.2830693731875976, "grad_norm": 0.16515739262104034, "learning_rate": 1.9624576319757302e-05, "loss": 0.5159, "step": 1269 }, { "epoch": 0.28329243809948695, "grad_norm": 0.17564083635807037, "learning_rate": 1.9623937257334767e-05, "loss": 0.5052, "step": 1270 }, { "epoch": 0.2835155030113763, "grad_norm": 0.18046805262565613, "learning_rate": 1.9623297661878054e-05, "loss": 0.5349, "step": 1271 }, { "epoch": 0.2837385679232657, "grad_norm": 0.16942784190177917, "learning_rate": 1.9622657533422583e-05, "loss": 0.4924, "step": 1272 }, { "epoch": 0.283961632835155, "grad_norm": 0.17093469202518463, "learning_rate": 1.9622016872003807e-05, "loss": 0.5261, "step": 1273 }, { "epoch": 0.2841846977470444, "grad_norm": 0.17606058716773987, "learning_rate": 1.9621375677657217e-05, "loss": 0.51, "step": 1274 }, { "epoch": 0.28440776265893375, "grad_norm": 0.16897493600845337, "learning_rate": 1.9620733950418316e-05, "loss": 0.5058, "step": 1275 }, { "epoch": 0.2846308275708231, "grad_norm": 0.18536695837974548, "learning_rate": 1.9620091690322654e-05, "loss": 0.512, "step": 1276 }, { "epoch": 0.2848538924827125, "grad_norm": 0.17734606564044952, "learning_rate": 1.96194488974058e-05, "loss": 0.5311, "step": 1277 }, { "epoch": 0.2850769573946018, "grad_norm": 0.18116462230682373, "learning_rate": 1.9618805571703356e-05, "loss": 0.5153, "step": 1278 }, { "epoch": 0.2853000223064912, "grad_norm": 0.17743848264217377, "learning_rate": 1.961816171325096e-05, "loss": 0.5238, "step": 1279 }, { "epoch": 0.28552308721838054, "grad_norm": 0.16778843104839325, "learning_rate": 1.961751732208426e-05, "loss": 0.505, "step": 1280 }, { "epoch": 0.2857461521302699, "grad_norm": 0.17177841067314148, "learning_rate": 1.961687239823896e-05, "loss": 0.5075, "step": 1281 }, { "epoch": 0.2859692170421593, "grad_norm": 0.18460653722286224, "learning_rate": 1.9616226941750775e-05, "loss": 0.5109, "step": 1282 }, { "epoch": 0.2861922819540486, "grad_norm": 0.16801568865776062, "learning_rate": 1.961558095265545e-05, "loss": 0.552, "step": 1283 }, { "epoch": 0.286415346865938, "grad_norm": 0.19550864398479462, "learning_rate": 1.961493443098877e-05, "loss": 0.5138, "step": 1284 }, { "epoch": 0.28663841177782734, "grad_norm": 0.17915892601013184, "learning_rate": 1.9614287376786537e-05, "loss": 0.5267, "step": 1285 }, { "epoch": 0.28686147668971673, "grad_norm": 0.17141470313072205, "learning_rate": 1.9613639790084596e-05, "loss": 0.5398, "step": 1286 }, { "epoch": 0.28708454160160607, "grad_norm": 0.16843633353710175, "learning_rate": 1.9612991670918808e-05, "loss": 0.5224, "step": 1287 }, { "epoch": 0.2873076065134954, "grad_norm": 0.17116384208202362, "learning_rate": 1.9612343019325077e-05, "loss": 0.506, "step": 1288 }, { "epoch": 0.2875306714253848, "grad_norm": 0.19337520003318787, "learning_rate": 1.9611693835339323e-05, "loss": 0.5417, "step": 1289 }, { "epoch": 0.28775373633727414, "grad_norm": 0.1831638365983963, "learning_rate": 1.9611044118997507e-05, "loss": 0.5487, "step": 1290 }, { "epoch": 0.2879768012491635, "grad_norm": 0.1741098165512085, "learning_rate": 1.961039387033561e-05, "loss": 0.5098, "step": 1291 }, { "epoch": 0.28819986616105286, "grad_norm": 0.16796158254146576, "learning_rate": 1.960974308938965e-05, "loss": 0.5105, "step": 1292 }, { "epoch": 0.2884229310729422, "grad_norm": 0.16848038136959076, "learning_rate": 1.9609091776195667e-05, "loss": 0.5106, "step": 1293 }, { "epoch": 0.2886459959848316, "grad_norm": 0.1563376486301422, "learning_rate": 1.960843993078974e-05, "loss": 0.4927, "step": 1294 }, { "epoch": 0.28886906089672093, "grad_norm": 0.16741539537906647, "learning_rate": 1.9607787553207972e-05, "loss": 0.5241, "step": 1295 }, { "epoch": 0.2890921258086103, "grad_norm": 0.16913025081157684, "learning_rate": 1.9607134643486492e-05, "loss": 0.5183, "step": 1296 }, { "epoch": 0.28931519072049966, "grad_norm": 0.17226742208003998, "learning_rate": 1.9606481201661466e-05, "loss": 0.5105, "step": 1297 }, { "epoch": 0.28953825563238905, "grad_norm": 0.17816194891929626, "learning_rate": 1.960582722776908e-05, "loss": 0.5352, "step": 1298 }, { "epoch": 0.2897613205442784, "grad_norm": 0.16423411667346954, "learning_rate": 1.9605172721845564e-05, "loss": 0.5303, "step": 1299 }, { "epoch": 0.2899843854561677, "grad_norm": 0.22330845892429352, "learning_rate": 1.9604517683927156e-05, "loss": 0.5115, "step": 1300 }, { "epoch": 0.2902074503680571, "grad_norm": 0.16385138034820557, "learning_rate": 1.960386211405015e-05, "loss": 0.5375, "step": 1301 }, { "epoch": 0.29043051527994646, "grad_norm": 0.17269232869148254, "learning_rate": 1.960320601225085e-05, "loss": 0.5291, "step": 1302 }, { "epoch": 0.29065358019183585, "grad_norm": 0.1625886857509613, "learning_rate": 1.9602549378565592e-05, "loss": 0.4982, "step": 1303 }, { "epoch": 0.2908766451037252, "grad_norm": 0.17353162169456482, "learning_rate": 1.9601892213030746e-05, "loss": 0.5109, "step": 1304 }, { "epoch": 0.2910997100156145, "grad_norm": 0.16863249242305756, "learning_rate": 1.9601234515682712e-05, "loss": 0.5409, "step": 1305 }, { "epoch": 0.2913227749275039, "grad_norm": 0.17428503930568695, "learning_rate": 1.960057628655792e-05, "loss": 0.5155, "step": 1306 }, { "epoch": 0.29154583983939325, "grad_norm": 0.1825830489397049, "learning_rate": 1.9599917525692816e-05, "loss": 0.5355, "step": 1307 }, { "epoch": 0.29176890475128264, "grad_norm": 0.8213825821876526, "learning_rate": 1.95992582331239e-05, "loss": 0.5366, "step": 1308 }, { "epoch": 0.291991969663172, "grad_norm": 0.16370530426502228, "learning_rate": 1.959859840888768e-05, "loss": 0.5071, "step": 1309 }, { "epoch": 0.2922150345750613, "grad_norm": 0.17610913515090942, "learning_rate": 1.95979380530207e-05, "loss": 0.5346, "step": 1310 }, { "epoch": 0.2924380994869507, "grad_norm": 0.16303309798240662, "learning_rate": 1.959727716555954e-05, "loss": 0.535, "step": 1311 }, { "epoch": 0.29266116439884005, "grad_norm": 0.17250092327594757, "learning_rate": 1.9596615746540798e-05, "loss": 0.5322, "step": 1312 }, { "epoch": 0.29288422931072944, "grad_norm": 0.18915972113609314, "learning_rate": 1.959595379600111e-05, "loss": 0.5293, "step": 1313 }, { "epoch": 0.2931072942226188, "grad_norm": 0.17842963337898254, "learning_rate": 1.9595291313977144e-05, "loss": 0.5197, "step": 1314 }, { "epoch": 0.29333035913450817, "grad_norm": 0.1693105846643448, "learning_rate": 1.959462830050559e-05, "loss": 0.5044, "step": 1315 }, { "epoch": 0.2935534240463975, "grad_norm": 0.23653189837932587, "learning_rate": 1.959396475562316e-05, "loss": 0.5286, "step": 1316 }, { "epoch": 0.29377648895828684, "grad_norm": 0.1704518347978592, "learning_rate": 1.9593300679366622e-05, "loss": 0.5288, "step": 1317 }, { "epoch": 0.29399955387017623, "grad_norm": 0.169080451130867, "learning_rate": 1.9592636071772745e-05, "loss": 0.5336, "step": 1318 }, { "epoch": 0.29422261878206557, "grad_norm": 0.2256423681974411, "learning_rate": 1.959197093287834e-05, "loss": 0.5253, "step": 1319 }, { "epoch": 0.29444568369395496, "grad_norm": 0.1699950098991394, "learning_rate": 1.9591305262720252e-05, "loss": 0.5308, "step": 1320 }, { "epoch": 0.2946687486058443, "grad_norm": 0.17314709722995758, "learning_rate": 1.9590639061335345e-05, "loss": 0.5477, "step": 1321 }, { "epoch": 0.29489181351773364, "grad_norm": 0.16141551733016968, "learning_rate": 1.958997232876052e-05, "loss": 0.5207, "step": 1322 }, { "epoch": 0.29511487842962303, "grad_norm": 0.15994872152805328, "learning_rate": 1.9589305065032705e-05, "loss": 0.5142, "step": 1323 }, { "epoch": 0.29533794334151237, "grad_norm": 0.16060283780097961, "learning_rate": 1.9588637270188852e-05, "loss": 0.4967, "step": 1324 }, { "epoch": 0.29556100825340176, "grad_norm": 0.17949143052101135, "learning_rate": 1.9587968944265955e-05, "loss": 0.5484, "step": 1325 }, { "epoch": 0.2957840731652911, "grad_norm": 0.17476752400398254, "learning_rate": 1.958730008730103e-05, "loss": 0.5466, "step": 1326 }, { "epoch": 0.2960071380771805, "grad_norm": 0.205107182264328, "learning_rate": 1.9586630699331115e-05, "loss": 0.5147, "step": 1327 }, { "epoch": 0.2962302029890698, "grad_norm": 0.17714425921440125, "learning_rate": 1.9585960780393293e-05, "loss": 0.5142, "step": 1328 }, { "epoch": 0.29645326790095916, "grad_norm": 0.17877231538295746, "learning_rate": 1.9585290330524663e-05, "loss": 0.5073, "step": 1329 }, { "epoch": 0.29667633281284855, "grad_norm": 0.16752929985523224, "learning_rate": 1.958461934976236e-05, "loss": 0.5381, "step": 1330 }, { "epoch": 0.2968993977247379, "grad_norm": 0.17550905048847198, "learning_rate": 1.9583947838143553e-05, "loss": 0.5185, "step": 1331 }, { "epoch": 0.2971224626366273, "grad_norm": 0.17885231971740723, "learning_rate": 1.958327579570542e-05, "loss": 0.5279, "step": 1332 }, { "epoch": 0.2973455275485166, "grad_norm": 0.16726696491241455, "learning_rate": 1.95826032224852e-05, "loss": 0.5253, "step": 1333 }, { "epoch": 0.29756859246040596, "grad_norm": 0.16935260593891144, "learning_rate": 1.9581930118520135e-05, "loss": 0.5311, "step": 1334 }, { "epoch": 0.29779165737229535, "grad_norm": 0.22187648713588715, "learning_rate": 1.9581256483847505e-05, "loss": 0.5124, "step": 1335 }, { "epoch": 0.2980147222841847, "grad_norm": 0.17254653573036194, "learning_rate": 1.9580582318504623e-05, "loss": 0.5113, "step": 1336 }, { "epoch": 0.2982377871960741, "grad_norm": 0.1686115860939026, "learning_rate": 1.9579907622528827e-05, "loss": 0.516, "step": 1337 }, { "epoch": 0.2984608521079634, "grad_norm": 0.17233097553253174, "learning_rate": 1.9579232395957492e-05, "loss": 0.5135, "step": 1338 }, { "epoch": 0.29868391701985275, "grad_norm": 0.1767144799232483, "learning_rate": 1.957855663882801e-05, "loss": 0.5113, "step": 1339 }, { "epoch": 0.29890698193174214, "grad_norm": 0.16990788280963898, "learning_rate": 1.9577880351177803e-05, "loss": 0.5213, "step": 1340 }, { "epoch": 0.2991300468436315, "grad_norm": 0.16583338379859924, "learning_rate": 1.957720353304434e-05, "loss": 0.5059, "step": 1341 }, { "epoch": 0.2993531117555209, "grad_norm": 0.17070691287517548, "learning_rate": 1.95765261844651e-05, "loss": 0.5154, "step": 1342 }, { "epoch": 0.2995761766674102, "grad_norm": 0.18274636566638947, "learning_rate": 1.9575848305477606e-05, "loss": 0.5367, "step": 1343 }, { "epoch": 0.2997992415792996, "grad_norm": 0.16276118159294128, "learning_rate": 1.957516989611939e-05, "loss": 0.5156, "step": 1344 }, { "epoch": 0.30002230649118894, "grad_norm": 0.16200782358646393, "learning_rate": 1.9574490956428045e-05, "loss": 0.5201, "step": 1345 }, { "epoch": 0.3002453714030783, "grad_norm": 0.18693317472934723, "learning_rate": 1.9573811486441158e-05, "loss": 0.5181, "step": 1346 }, { "epoch": 0.30046843631496767, "grad_norm": 0.17908775806427002, "learning_rate": 1.9573131486196372e-05, "loss": 0.5012, "step": 1347 }, { "epoch": 0.300691501226857, "grad_norm": 0.16340862214565277, "learning_rate": 1.9572450955731346e-05, "loss": 0.4951, "step": 1348 }, { "epoch": 0.3009145661387464, "grad_norm": 0.16997461020946503, "learning_rate": 1.957176989508377e-05, "loss": 0.5248, "step": 1349 }, { "epoch": 0.30113763105063573, "grad_norm": 0.1896587759256363, "learning_rate": 1.9571088304291376e-05, "loss": 0.5129, "step": 1350 }, { "epoch": 0.30136069596252507, "grad_norm": 0.1624515950679779, "learning_rate": 1.95704061833919e-05, "loss": 0.4872, "step": 1351 }, { "epoch": 0.30158376087441446, "grad_norm": 0.16828244924545288, "learning_rate": 1.956972353242313e-05, "loss": 0.5291, "step": 1352 }, { "epoch": 0.3018068257863038, "grad_norm": 0.17992524802684784, "learning_rate": 1.9569040351422882e-05, "loss": 0.5396, "step": 1353 }, { "epoch": 0.3020298906981932, "grad_norm": 0.17174522578716278, "learning_rate": 1.956835664042898e-05, "loss": 0.523, "step": 1354 }, { "epoch": 0.30225295561008253, "grad_norm": 0.18021540343761444, "learning_rate": 1.9567672399479304e-05, "loss": 0.5091, "step": 1355 }, { "epoch": 0.30247602052197187, "grad_norm": 0.1889013797044754, "learning_rate": 1.9566987628611748e-05, "loss": 0.521, "step": 1356 }, { "epoch": 0.30269908543386126, "grad_norm": 0.1710296869277954, "learning_rate": 1.9566302327864233e-05, "loss": 0.51, "step": 1357 }, { "epoch": 0.3029221503457506, "grad_norm": 0.16925543546676636, "learning_rate": 1.9565616497274725e-05, "loss": 0.5194, "step": 1358 }, { "epoch": 0.30314521525764, "grad_norm": 0.16558070480823517, "learning_rate": 1.95649301368812e-05, "loss": 0.5284, "step": 1359 }, { "epoch": 0.3033682801695293, "grad_norm": 0.1646072119474411, "learning_rate": 1.9564243246721686e-05, "loss": 0.53, "step": 1360 }, { "epoch": 0.3035913450814187, "grad_norm": 0.17670351266860962, "learning_rate": 1.9563555826834214e-05, "loss": 0.527, "step": 1361 }, { "epoch": 0.30381440999330805, "grad_norm": 0.17414048314094543, "learning_rate": 1.9562867877256867e-05, "loss": 0.5318, "step": 1362 }, { "epoch": 0.3040374749051974, "grad_norm": 0.18004649877548218, "learning_rate": 1.956217939802774e-05, "loss": 0.5149, "step": 1363 }, { "epoch": 0.3042605398170868, "grad_norm": 0.16560295224189758, "learning_rate": 1.9561490389184973e-05, "loss": 0.5093, "step": 1364 }, { "epoch": 0.3044836047289761, "grad_norm": 0.1967393010854721, "learning_rate": 1.956080085076672e-05, "loss": 0.499, "step": 1365 }, { "epoch": 0.3047066696408655, "grad_norm": 0.17638468742370605, "learning_rate": 1.956011078281118e-05, "loss": 0.5287, "step": 1366 }, { "epoch": 0.30492973455275485, "grad_norm": 0.18389096856117249, "learning_rate": 1.955942018535657e-05, "loss": 0.5263, "step": 1367 }, { "epoch": 0.3051527994646442, "grad_norm": 0.16795802116394043, "learning_rate": 1.9558729058441135e-05, "loss": 0.4909, "step": 1368 }, { "epoch": 0.3053758643765336, "grad_norm": 0.1886921525001526, "learning_rate": 1.955803740210316e-05, "loss": 0.4929, "step": 1369 }, { "epoch": 0.3055989292884229, "grad_norm": 0.15743811428546906, "learning_rate": 1.9557345216380953e-05, "loss": 0.5252, "step": 1370 }, { "epoch": 0.3058219942003123, "grad_norm": 0.18187056481838226, "learning_rate": 1.955665250131285e-05, "loss": 0.5307, "step": 1371 }, { "epoch": 0.30604505911220165, "grad_norm": 0.17258834838867188, "learning_rate": 1.9555959256937214e-05, "loss": 0.5007, "step": 1372 }, { "epoch": 0.30626812402409104, "grad_norm": 0.16746965050697327, "learning_rate": 1.9555265483292446e-05, "loss": 0.5174, "step": 1373 }, { "epoch": 0.3064911889359804, "grad_norm": 0.191069096326828, "learning_rate": 1.955457118041697e-05, "loss": 0.5462, "step": 1374 }, { "epoch": 0.3067142538478697, "grad_norm": 0.17369060218334198, "learning_rate": 1.9553876348349242e-05, "loss": 0.5175, "step": 1375 }, { "epoch": 0.3069373187597591, "grad_norm": 0.16620078682899475, "learning_rate": 1.9553180987127748e-05, "loss": 0.5298, "step": 1376 }, { "epoch": 0.30716038367164844, "grad_norm": 0.22137194871902466, "learning_rate": 1.9552485096790996e-05, "loss": 0.5135, "step": 1377 }, { "epoch": 0.30738344858353783, "grad_norm": 0.20380550622940063, "learning_rate": 1.9551788677377535e-05, "loss": 0.5304, "step": 1378 }, { "epoch": 0.30760651349542717, "grad_norm": 0.16844195127487183, "learning_rate": 1.955109172892593e-05, "loss": 0.55, "step": 1379 }, { "epoch": 0.3078295784073165, "grad_norm": 0.1768990010023117, "learning_rate": 1.955039425147479e-05, "loss": 0.5254, "step": 1380 }, { "epoch": 0.3080526433192059, "grad_norm": 0.16573172807693481, "learning_rate": 1.954969624506274e-05, "loss": 0.5195, "step": 1381 }, { "epoch": 0.30827570823109524, "grad_norm": 0.16543026268482208, "learning_rate": 1.9548997709728443e-05, "loss": 0.5243, "step": 1382 }, { "epoch": 0.30849877314298463, "grad_norm": 0.17528237402439117, "learning_rate": 1.9548298645510587e-05, "loss": 0.5133, "step": 1383 }, { "epoch": 0.30872183805487396, "grad_norm": 0.16765229403972626, "learning_rate": 1.954759905244789e-05, "loss": 0.5185, "step": 1384 }, { "epoch": 0.3089449029667633, "grad_norm": 0.17179544270038605, "learning_rate": 1.9546898930579102e-05, "loss": 0.5156, "step": 1385 }, { "epoch": 0.3091679678786527, "grad_norm": 0.17194928228855133, "learning_rate": 1.9546198279942997e-05, "loss": 0.5134, "step": 1386 }, { "epoch": 0.30939103279054203, "grad_norm": 0.16621114313602448, "learning_rate": 1.9545497100578382e-05, "loss": 0.5101, "step": 1387 }, { "epoch": 0.3096140977024314, "grad_norm": 0.16408509016036987, "learning_rate": 1.9544795392524096e-05, "loss": 0.515, "step": 1388 }, { "epoch": 0.30983716261432076, "grad_norm": 0.16583041846752167, "learning_rate": 1.9544093155819004e-05, "loss": 0.4974, "step": 1389 }, { "epoch": 0.31006022752621015, "grad_norm": 0.16977885365486145, "learning_rate": 1.9543390390502e-05, "loss": 0.5229, "step": 1390 }, { "epoch": 0.3102832924380995, "grad_norm": 0.1626800149679184, "learning_rate": 1.9542687096611998e-05, "loss": 0.5166, "step": 1391 }, { "epoch": 0.3105063573499888, "grad_norm": 0.17305392026901245, "learning_rate": 1.9541983274187964e-05, "loss": 0.516, "step": 1392 }, { "epoch": 0.3107294222618782, "grad_norm": 0.18632498383522034, "learning_rate": 1.9541278923268872e-05, "loss": 0.4985, "step": 1393 }, { "epoch": 0.31095248717376756, "grad_norm": 0.18472731113433838, "learning_rate": 1.9540574043893738e-05, "loss": 0.5304, "step": 1394 }, { "epoch": 0.31117555208565695, "grad_norm": 0.1734933704137802, "learning_rate": 1.9539868636101602e-05, "loss": 0.5523, "step": 1395 }, { "epoch": 0.3113986169975463, "grad_norm": 0.17965291440486908, "learning_rate": 1.9539162699931534e-05, "loss": 0.532, "step": 1396 }, { "epoch": 0.3116216819094356, "grad_norm": 0.1630621999502182, "learning_rate": 1.9538456235422625e-05, "loss": 0.5351, "step": 1397 }, { "epoch": 0.311844746821325, "grad_norm": 0.17468804121017456, "learning_rate": 1.9537749242614016e-05, "loss": 0.5144, "step": 1398 }, { "epoch": 0.31206781173321435, "grad_norm": 0.17356379330158234, "learning_rate": 1.9537041721544862e-05, "loss": 0.5124, "step": 1399 }, { "epoch": 0.31229087664510374, "grad_norm": 0.1623128354549408, "learning_rate": 1.953633367225434e-05, "loss": 0.5256, "step": 1400 }, { "epoch": 0.3125139415569931, "grad_norm": 0.17314434051513672, "learning_rate": 1.9535625094781677e-05, "loss": 0.5231, "step": 1401 }, { "epoch": 0.3127370064688825, "grad_norm": 0.17369012534618378, "learning_rate": 1.9534915989166115e-05, "loss": 0.5266, "step": 1402 }, { "epoch": 0.3129600713807718, "grad_norm": 0.16464915871620178, "learning_rate": 1.9534206355446927e-05, "loss": 0.4962, "step": 1403 }, { "epoch": 0.31318313629266115, "grad_norm": 0.16123609244823456, "learning_rate": 1.953349619366342e-05, "loss": 0.4932, "step": 1404 }, { "epoch": 0.31340620120455054, "grad_norm": 0.16396626830101013, "learning_rate": 1.9532785503854926e-05, "loss": 0.5103, "step": 1405 }, { "epoch": 0.3136292661164399, "grad_norm": 0.1750822216272354, "learning_rate": 1.9532074286060805e-05, "loss": 0.5108, "step": 1406 }, { "epoch": 0.31385233102832927, "grad_norm": 0.1627611666917801, "learning_rate": 1.953136254032045e-05, "loss": 0.4933, "step": 1407 }, { "epoch": 0.3140753959402186, "grad_norm": 0.17482301592826843, "learning_rate": 1.9530650266673286e-05, "loss": 0.5179, "step": 1408 }, { "epoch": 0.31429846085210794, "grad_norm": 0.17025405168533325, "learning_rate": 1.952993746515876e-05, "loss": 0.4968, "step": 1409 }, { "epoch": 0.31452152576399733, "grad_norm": 0.17483878135681152, "learning_rate": 1.9529224135816348e-05, "loss": 0.5124, "step": 1410 }, { "epoch": 0.31474459067588667, "grad_norm": 0.1743018627166748, "learning_rate": 1.9528510278685568e-05, "loss": 0.5207, "step": 1411 }, { "epoch": 0.31496765558777606, "grad_norm": 0.16575628519058228, "learning_rate": 1.9527795893805947e-05, "loss": 0.4846, "step": 1412 }, { "epoch": 0.3151907204996654, "grad_norm": 0.16664400696754456, "learning_rate": 1.952708098121706e-05, "loss": 0.5097, "step": 1413 }, { "epoch": 0.31541378541155474, "grad_norm": 0.17265918850898743, "learning_rate": 1.9526365540958497e-05, "loss": 0.5002, "step": 1414 }, { "epoch": 0.31563685032344413, "grad_norm": 0.16979828476905823, "learning_rate": 1.952564957306989e-05, "loss": 0.5172, "step": 1415 }, { "epoch": 0.31585991523533347, "grad_norm": 0.17120909690856934, "learning_rate": 1.952493307759089e-05, "loss": 0.5204, "step": 1416 }, { "epoch": 0.31608298014722286, "grad_norm": 0.15589144825935364, "learning_rate": 1.9524216054561186e-05, "loss": 0.5001, "step": 1417 }, { "epoch": 0.3163060450591122, "grad_norm": 0.1638038605451584, "learning_rate": 1.9523498504020486e-05, "loss": 0.495, "step": 1418 }, { "epoch": 0.3165291099710016, "grad_norm": 0.2510809600353241, "learning_rate": 1.952278042600853e-05, "loss": 0.5177, "step": 1419 }, { "epoch": 0.3167521748828909, "grad_norm": 0.1806640475988388, "learning_rate": 1.9522061820565093e-05, "loss": 0.5187, "step": 1420 }, { "epoch": 0.31697523979478026, "grad_norm": 0.16829638183116913, "learning_rate": 1.9521342687729977e-05, "loss": 0.5275, "step": 1421 }, { "epoch": 0.31719830470666965, "grad_norm": 0.18520011007785797, "learning_rate": 1.9520623027543015e-05, "loss": 0.5654, "step": 1422 }, { "epoch": 0.317421369618559, "grad_norm": 0.17533186078071594, "learning_rate": 1.951990284004406e-05, "loss": 0.5102, "step": 1423 }, { "epoch": 0.3176444345304484, "grad_norm": 0.17360709607601166, "learning_rate": 1.9519182125273e-05, "loss": 0.5196, "step": 1424 }, { "epoch": 0.3178674994423377, "grad_norm": 0.17495203018188477, "learning_rate": 1.951846088326976e-05, "loss": 0.5416, "step": 1425 }, { "epoch": 0.31809056435422706, "grad_norm": 0.17361585795879364, "learning_rate": 1.9517739114074282e-05, "loss": 0.53, "step": 1426 }, { "epoch": 0.31831362926611645, "grad_norm": 0.17399337887763977, "learning_rate": 1.9517016817726542e-05, "loss": 0.5207, "step": 1427 }, { "epoch": 0.3185366941780058, "grad_norm": 0.16763678193092346, "learning_rate": 1.9516293994266548e-05, "loss": 0.5059, "step": 1428 }, { "epoch": 0.3187597590898952, "grad_norm": 0.16563831269741058, "learning_rate": 1.951557064373433e-05, "loss": 0.5079, "step": 1429 }, { "epoch": 0.3189828240017845, "grad_norm": 0.1833125650882721, "learning_rate": 1.951484676616996e-05, "loss": 0.5181, "step": 1430 }, { "epoch": 0.31920588891367385, "grad_norm": 0.15749512612819672, "learning_rate": 1.951412236161352e-05, "loss": 0.4967, "step": 1431 }, { "epoch": 0.31942895382556324, "grad_norm": 0.17580753564834595, "learning_rate": 1.9513397430105137e-05, "loss": 0.5238, "step": 1432 }, { "epoch": 0.3196520187374526, "grad_norm": 0.16896769404411316, "learning_rate": 1.9512671971684963e-05, "loss": 0.5218, "step": 1433 }, { "epoch": 0.319875083649342, "grad_norm": 0.1761835813522339, "learning_rate": 1.951194598639318e-05, "loss": 0.5183, "step": 1434 }, { "epoch": 0.3200981485612313, "grad_norm": 0.1777181178331375, "learning_rate": 1.9511219474269992e-05, "loss": 0.5283, "step": 1435 }, { "epoch": 0.3203212134731207, "grad_norm": 0.17135201394557953, "learning_rate": 1.9510492435355647e-05, "loss": 0.5183, "step": 1436 }, { "epoch": 0.32054427838501004, "grad_norm": 0.18420204520225525, "learning_rate": 1.9509764869690407e-05, "loss": 0.5239, "step": 1437 }, { "epoch": 0.3207673432968994, "grad_norm": 0.17093034088611603, "learning_rate": 1.9509036777314568e-05, "loss": 0.508, "step": 1438 }, { "epoch": 0.32099040820878877, "grad_norm": 0.18275004625320435, "learning_rate": 1.9508308158268458e-05, "loss": 0.5257, "step": 1439 }, { "epoch": 0.3212134731206781, "grad_norm": 0.19184084236621857, "learning_rate": 1.950757901259243e-05, "loss": 0.5138, "step": 1440 }, { "epoch": 0.3214365380325675, "grad_norm": 0.19820280373096466, "learning_rate": 1.9506849340326876e-05, "loss": 0.5406, "step": 1441 }, { "epoch": 0.32165960294445683, "grad_norm": 0.1850280463695526, "learning_rate": 1.9506119141512204e-05, "loss": 0.5009, "step": 1442 }, { "epoch": 0.32188266785634617, "grad_norm": 0.17693570256233215, "learning_rate": 1.9505388416188854e-05, "loss": 0.5502, "step": 1443 }, { "epoch": 0.32210573276823556, "grad_norm": 0.16360151767730713, "learning_rate": 1.9504657164397307e-05, "loss": 0.5215, "step": 1444 }, { "epoch": 0.3223287976801249, "grad_norm": 0.1807732880115509, "learning_rate": 1.950392538617806e-05, "loss": 0.5024, "step": 1445 }, { "epoch": 0.3225518625920143, "grad_norm": 0.17271289229393005, "learning_rate": 1.950319308157164e-05, "loss": 0.4997, "step": 1446 }, { "epoch": 0.32277492750390363, "grad_norm": 0.16827355325222015, "learning_rate": 1.950246025061861e-05, "loss": 0.5178, "step": 1447 }, { "epoch": 0.322997992415793, "grad_norm": 0.17736375331878662, "learning_rate": 1.950172689335956e-05, "loss": 0.494, "step": 1448 }, { "epoch": 0.32322105732768236, "grad_norm": 0.16899362206459045, "learning_rate": 1.9500993009835106e-05, "loss": 0.4701, "step": 1449 }, { "epoch": 0.3234441222395717, "grad_norm": 0.1732366383075714, "learning_rate": 1.9500258600085894e-05, "loss": 0.4774, "step": 1450 }, { "epoch": 0.3236671871514611, "grad_norm": 0.16916102170944214, "learning_rate": 1.9499523664152603e-05, "loss": 0.5222, "step": 1451 }, { "epoch": 0.3238902520633504, "grad_norm": 0.18570592999458313, "learning_rate": 1.9498788202075936e-05, "loss": 0.5611, "step": 1452 }, { "epoch": 0.3241133169752398, "grad_norm": 0.1690537929534912, "learning_rate": 1.9498052213896627e-05, "loss": 0.5055, "step": 1453 }, { "epoch": 0.32433638188712915, "grad_norm": 0.17589417099952698, "learning_rate": 1.9497315699655447e-05, "loss": 0.539, "step": 1454 }, { "epoch": 0.3245594467990185, "grad_norm": 0.1848466843366623, "learning_rate": 1.949657865939318e-05, "loss": 0.5337, "step": 1455 }, { "epoch": 0.3247825117109079, "grad_norm": 0.17046886682510376, "learning_rate": 1.949584109315065e-05, "loss": 0.507, "step": 1456 }, { "epoch": 0.3250055766227972, "grad_norm": 0.17708532512187958, "learning_rate": 1.9495103000968708e-05, "loss": 0.5377, "step": 1457 }, { "epoch": 0.3252286415346866, "grad_norm": 0.18529073894023895, "learning_rate": 1.9494364382888236e-05, "loss": 0.5136, "step": 1458 }, { "epoch": 0.32545170644657595, "grad_norm": 0.16627921164035797, "learning_rate": 1.9493625238950143e-05, "loss": 0.5149, "step": 1459 }, { "epoch": 0.3256747713584653, "grad_norm": 0.1823386400938034, "learning_rate": 1.949288556919537e-05, "loss": 0.5422, "step": 1460 }, { "epoch": 0.3258978362703547, "grad_norm": 0.17173448204994202, "learning_rate": 1.949214537366488e-05, "loss": 0.5315, "step": 1461 }, { "epoch": 0.326120901182244, "grad_norm": 0.20931746065616608, "learning_rate": 1.949140465239967e-05, "loss": 0.525, "step": 1462 }, { "epoch": 0.3263439660941334, "grad_norm": 0.3969743549823761, "learning_rate": 1.9490663405440765e-05, "loss": 0.5193, "step": 1463 }, { "epoch": 0.32656703100602275, "grad_norm": 0.17721976339817047, "learning_rate": 1.9489921632829227e-05, "loss": 0.5115, "step": 1464 }, { "epoch": 0.32679009591791214, "grad_norm": 0.16409623622894287, "learning_rate": 1.948917933460613e-05, "loss": 0.5123, "step": 1465 }, { "epoch": 0.3270131608298015, "grad_norm": 0.16853661835193634, "learning_rate": 1.9488436510812594e-05, "loss": 0.5269, "step": 1466 }, { "epoch": 0.3272362257416908, "grad_norm": 0.1875690519809723, "learning_rate": 1.948769316148976e-05, "loss": 0.4727, "step": 1467 }, { "epoch": 0.3274592906535802, "grad_norm": 0.1777825504541397, "learning_rate": 1.9486949286678798e-05, "loss": 0.5095, "step": 1468 }, { "epoch": 0.32768235556546954, "grad_norm": 0.17113567888736725, "learning_rate": 1.948620488642091e-05, "loss": 0.5244, "step": 1469 }, { "epoch": 0.32790542047735893, "grad_norm": 0.16337715089321136, "learning_rate": 1.9485459960757325e-05, "loss": 0.5194, "step": 1470 }, { "epoch": 0.32812848538924827, "grad_norm": 0.1793193519115448, "learning_rate": 1.9484714509729305e-05, "loss": 0.527, "step": 1471 }, { "epoch": 0.3283515503011376, "grad_norm": 0.16597363352775574, "learning_rate": 1.948396853337813e-05, "loss": 0.493, "step": 1472 }, { "epoch": 0.328574615213027, "grad_norm": 0.16836529970169067, "learning_rate": 1.9483222031745118e-05, "loss": 0.4932, "step": 1473 }, { "epoch": 0.32879768012491634, "grad_norm": 0.17120935022830963, "learning_rate": 1.9482475004871622e-05, "loss": 0.5156, "step": 1474 }, { "epoch": 0.32902074503680573, "grad_norm": 0.1692550629377365, "learning_rate": 1.9481727452799013e-05, "loss": 0.5089, "step": 1475 }, { "epoch": 0.32924380994869507, "grad_norm": 0.16897635161876678, "learning_rate": 1.9480979375568694e-05, "loss": 0.5042, "step": 1476 }, { "epoch": 0.32946687486058446, "grad_norm": 0.16120664775371552, "learning_rate": 1.9480230773222102e-05, "loss": 0.5105, "step": 1477 }, { "epoch": 0.3296899397724738, "grad_norm": 0.17156098783016205, "learning_rate": 1.9479481645800694e-05, "loss": 0.5217, "step": 1478 }, { "epoch": 0.32991300468436313, "grad_norm": 0.17012807726860046, "learning_rate": 1.9478731993345965e-05, "loss": 0.4947, "step": 1479 }, { "epoch": 0.3301360695962525, "grad_norm": 0.16123747825622559, "learning_rate": 1.9477981815899435e-05, "loss": 0.5181, "step": 1480 }, { "epoch": 0.33035913450814186, "grad_norm": 0.16029803454875946, "learning_rate": 1.947723111350265e-05, "loss": 0.5089, "step": 1481 }, { "epoch": 0.33058219942003125, "grad_norm": 0.17824719846248627, "learning_rate": 1.9476479886197198e-05, "loss": 0.5245, "step": 1482 }, { "epoch": 0.3308052643319206, "grad_norm": 0.1661938726902008, "learning_rate": 1.9475728134024675e-05, "loss": 0.4971, "step": 1483 }, { "epoch": 0.3310283292438099, "grad_norm": 0.16781049966812134, "learning_rate": 1.9474975857026727e-05, "loss": 0.5187, "step": 1484 }, { "epoch": 0.3312513941556993, "grad_norm": 0.20398850739002228, "learning_rate": 1.9474223055245014e-05, "loss": 0.5087, "step": 1485 }, { "epoch": 0.33147445906758866, "grad_norm": 0.19590437412261963, "learning_rate": 1.9473469728721233e-05, "loss": 0.5193, "step": 1486 }, { "epoch": 0.33169752397947805, "grad_norm": 0.16896989941596985, "learning_rate": 1.947271587749711e-05, "loss": 0.5272, "step": 1487 }, { "epoch": 0.3319205888913674, "grad_norm": 0.16621822118759155, "learning_rate": 1.9471961501614395e-05, "loss": 0.5356, "step": 1488 }, { "epoch": 0.3321436538032567, "grad_norm": 0.16570885479450226, "learning_rate": 1.947120660111487e-05, "loss": 0.5105, "step": 1489 }, { "epoch": 0.3323667187151461, "grad_norm": 0.1804710477590561, "learning_rate": 1.9470451176040343e-05, "loss": 0.5037, "step": 1490 }, { "epoch": 0.33258978362703545, "grad_norm": 0.16383785009384155, "learning_rate": 1.9469695226432667e-05, "loss": 0.4991, "step": 1491 }, { "epoch": 0.33281284853892484, "grad_norm": 0.16517230868339539, "learning_rate": 1.9468938752333698e-05, "loss": 0.4858, "step": 1492 }, { "epoch": 0.3330359134508142, "grad_norm": 0.1879924088716507, "learning_rate": 1.946818175378534e-05, "loss": 0.5003, "step": 1493 }, { "epoch": 0.3332589783627036, "grad_norm": 0.16963818669319153, "learning_rate": 1.9467424230829514e-05, "loss": 0.5081, "step": 1494 }, { "epoch": 0.3334820432745929, "grad_norm": 0.1686849743127823, "learning_rate": 1.946666618350819e-05, "loss": 0.5384, "step": 1495 }, { "epoch": 0.33370510818648225, "grad_norm": 0.18511471152305603, "learning_rate": 1.946590761186334e-05, "loss": 0.5387, "step": 1496 }, { "epoch": 0.33392817309837164, "grad_norm": 0.15684032440185547, "learning_rate": 1.9465148515936986e-05, "loss": 0.5051, "step": 1497 }, { "epoch": 0.334151238010261, "grad_norm": 0.18369467556476593, "learning_rate": 1.9464388895771165e-05, "loss": 0.4973, "step": 1498 }, { "epoch": 0.33437430292215037, "grad_norm": 0.17524507641792297, "learning_rate": 1.9463628751407957e-05, "loss": 0.5204, "step": 1499 }, { "epoch": 0.3345973678340397, "grad_norm": 0.15974605083465576, "learning_rate": 1.946286808288946e-05, "loss": 0.5128, "step": 1500 }, { "epoch": 0.33482043274592904, "grad_norm": 0.1720893234014511, "learning_rate": 1.9462106890257805e-05, "loss": 0.531, "step": 1501 }, { "epoch": 0.33504349765781843, "grad_norm": 0.1723415106534958, "learning_rate": 1.946134517355515e-05, "loss": 0.5344, "step": 1502 }, { "epoch": 0.33526656256970777, "grad_norm": 0.1691267043352127, "learning_rate": 1.9460582932823685e-05, "loss": 0.4693, "step": 1503 }, { "epoch": 0.33548962748159716, "grad_norm": 0.18036657571792603, "learning_rate": 1.945982016810563e-05, "loss": 0.5175, "step": 1504 }, { "epoch": 0.3357126923934865, "grad_norm": 0.16378861665725708, "learning_rate": 1.9459056879443227e-05, "loss": 0.4967, "step": 1505 }, { "epoch": 0.33593575730537584, "grad_norm": 0.16954581439495087, "learning_rate": 1.9458293066878754e-05, "loss": 0.5024, "step": 1506 }, { "epoch": 0.33615882221726523, "grad_norm": 0.16542184352874756, "learning_rate": 1.9457528730454516e-05, "loss": 0.525, "step": 1507 }, { "epoch": 0.33638188712915457, "grad_norm": 0.16997484862804413, "learning_rate": 1.9456763870212853e-05, "loss": 0.5177, "step": 1508 }, { "epoch": 0.33660495204104396, "grad_norm": 0.1820513904094696, "learning_rate": 1.945599848619611e-05, "loss": 0.5173, "step": 1509 }, { "epoch": 0.3368280169529333, "grad_norm": 0.1801905781030655, "learning_rate": 1.94552325784467e-05, "loss": 0.4951, "step": 1510 }, { "epoch": 0.3370510818648227, "grad_norm": 0.16981808841228485, "learning_rate": 1.9454466147007032e-05, "loss": 0.5359, "step": 1511 }, { "epoch": 0.337274146776712, "grad_norm": 0.16826239228248596, "learning_rate": 1.9453699191919557e-05, "loss": 0.4852, "step": 1512 }, { "epoch": 0.33749721168860136, "grad_norm": 0.16905133426189423, "learning_rate": 1.9452931713226752e-05, "loss": 0.502, "step": 1513 }, { "epoch": 0.33772027660049075, "grad_norm": 0.3108493685722351, "learning_rate": 1.945216371097113e-05, "loss": 0.5025, "step": 1514 }, { "epoch": 0.3379433415123801, "grad_norm": 0.16754211485385895, "learning_rate": 1.9451395185195224e-05, "loss": 0.4913, "step": 1515 }, { "epoch": 0.3381664064242695, "grad_norm": 0.16909672319889069, "learning_rate": 1.9450626135941603e-05, "loss": 0.5489, "step": 1516 }, { "epoch": 0.3383894713361588, "grad_norm": 0.17022110521793365, "learning_rate": 1.944985656325286e-05, "loss": 0.5266, "step": 1517 }, { "epoch": 0.33861253624804816, "grad_norm": 0.16641078889369965, "learning_rate": 1.9449086467171615e-05, "loss": 0.4913, "step": 1518 }, { "epoch": 0.33883560115993755, "grad_norm": 0.16541585326194763, "learning_rate": 1.9448315847740527e-05, "loss": 0.5116, "step": 1519 }, { "epoch": 0.3390586660718269, "grad_norm": 0.175320103764534, "learning_rate": 1.9447544705002273e-05, "loss": 0.506, "step": 1520 }, { "epoch": 0.3392817309837163, "grad_norm": 0.18363285064697266, "learning_rate": 1.9446773038999566e-05, "loss": 0.5119, "step": 1521 }, { "epoch": 0.3395047958956056, "grad_norm": 0.17000706493854523, "learning_rate": 1.944600084977515e-05, "loss": 0.5286, "step": 1522 }, { "epoch": 0.339727860807495, "grad_norm": 0.20967601239681244, "learning_rate": 1.9445228137371784e-05, "loss": 0.5366, "step": 1523 }, { "epoch": 0.33995092571938434, "grad_norm": 0.1751982420682907, "learning_rate": 1.9444454901832273e-05, "loss": 0.5298, "step": 1524 }, { "epoch": 0.3401739906312737, "grad_norm": 0.16722087562084198, "learning_rate": 1.944368114319944e-05, "loss": 0.5248, "step": 1525 }, { "epoch": 0.3403970555431631, "grad_norm": 0.16639317572116852, "learning_rate": 1.9442906861516143e-05, "loss": 0.4947, "step": 1526 }, { "epoch": 0.3406201204550524, "grad_norm": 0.18173882365226746, "learning_rate": 1.9442132056825268e-05, "loss": 0.5198, "step": 1527 }, { "epoch": 0.3408431853669418, "grad_norm": 0.16811014711856842, "learning_rate": 1.9441356729169725e-05, "loss": 0.5193, "step": 1528 }, { "epoch": 0.34106625027883114, "grad_norm": 0.18385176360607147, "learning_rate": 1.944058087859246e-05, "loss": 0.5419, "step": 1529 }, { "epoch": 0.3412893151907205, "grad_norm": 0.18335555493831635, "learning_rate": 1.9439804505136437e-05, "loss": 0.5442, "step": 1530 }, { "epoch": 0.34151238010260987, "grad_norm": 0.16866059601306915, "learning_rate": 1.9439027608844665e-05, "loss": 0.4893, "step": 1531 }, { "epoch": 0.3417354450144992, "grad_norm": 0.16770686209201813, "learning_rate": 1.9438250189760168e-05, "loss": 0.5276, "step": 1532 }, { "epoch": 0.3419585099263886, "grad_norm": 0.17907945811748505, "learning_rate": 1.943747224792601e-05, "loss": 0.5221, "step": 1533 }, { "epoch": 0.34218157483827794, "grad_norm": 0.16425661742687225, "learning_rate": 1.9436693783385273e-05, "loss": 0.527, "step": 1534 }, { "epoch": 0.3424046397501673, "grad_norm": 0.20484143495559692, "learning_rate": 1.9435914796181077e-05, "loss": 0.5294, "step": 1535 }, { "epoch": 0.34262770466205666, "grad_norm": 0.171040877699852, "learning_rate": 1.9435135286356563e-05, "loss": 0.5317, "step": 1536 }, { "epoch": 0.342850769573946, "grad_norm": 0.1566307246685028, "learning_rate": 1.943435525395491e-05, "loss": 0.4908, "step": 1537 }, { "epoch": 0.3430738344858354, "grad_norm": 0.15999655425548553, "learning_rate": 1.9433574699019315e-05, "loss": 0.5211, "step": 1538 }, { "epoch": 0.34329689939772473, "grad_norm": 0.16879580914974213, "learning_rate": 1.9432793621593013e-05, "loss": 0.5473, "step": 1539 }, { "epoch": 0.3435199643096141, "grad_norm": 0.16014404594898224, "learning_rate": 1.943201202171927e-05, "loss": 0.5369, "step": 1540 }, { "epoch": 0.34374302922150346, "grad_norm": 0.1585661619901657, "learning_rate": 1.943122989944137e-05, "loss": 0.5203, "step": 1541 }, { "epoch": 0.3439660941333928, "grad_norm": 0.16618762910366058, "learning_rate": 1.943044725480263e-05, "loss": 0.5163, "step": 1542 }, { "epoch": 0.3441891590452822, "grad_norm": 0.1790398806333542, "learning_rate": 1.9429664087846407e-05, "loss": 0.492, "step": 1543 }, { "epoch": 0.3444122239571715, "grad_norm": 0.17689375579357147, "learning_rate": 1.9428880398616065e-05, "loss": 0.5009, "step": 1544 }, { "epoch": 0.3446352888690609, "grad_norm": 0.16719898581504822, "learning_rate": 1.942809618715502e-05, "loss": 0.5064, "step": 1545 }, { "epoch": 0.34485835378095026, "grad_norm": 0.1700020283460617, "learning_rate": 1.9427311453506705e-05, "loss": 0.5229, "step": 1546 }, { "epoch": 0.3450814186928396, "grad_norm": 0.17998361587524414, "learning_rate": 1.9426526197714582e-05, "loss": 0.5188, "step": 1547 }, { "epoch": 0.345304483604729, "grad_norm": 0.17831408977508545, "learning_rate": 1.9425740419822138e-05, "loss": 0.4967, "step": 1548 }, { "epoch": 0.3455275485166183, "grad_norm": 0.16397793591022491, "learning_rate": 1.9424954119872904e-05, "loss": 0.5319, "step": 1549 }, { "epoch": 0.3457506134285077, "grad_norm": 0.17712676525115967, "learning_rate": 1.9424167297910425e-05, "loss": 0.5104, "step": 1550 }, { "epoch": 0.34597367834039705, "grad_norm": 0.19361375272274017, "learning_rate": 1.9423379953978277e-05, "loss": 0.5113, "step": 1551 }, { "epoch": 0.34619674325228644, "grad_norm": 0.1744556725025177, "learning_rate": 1.9422592088120074e-05, "loss": 0.5331, "step": 1552 }, { "epoch": 0.3464198081641758, "grad_norm": 0.1747075766324997, "learning_rate": 1.9421803700379454e-05, "loss": 0.5257, "step": 1553 }, { "epoch": 0.3466428730760651, "grad_norm": 0.17446283996105194, "learning_rate": 1.9421014790800074e-05, "loss": 0.5133, "step": 1554 }, { "epoch": 0.3468659379879545, "grad_norm": 0.1626337766647339, "learning_rate": 1.9420225359425637e-05, "loss": 0.4812, "step": 1555 }, { "epoch": 0.34708900289984385, "grad_norm": 0.16959097981452942, "learning_rate": 1.9419435406299863e-05, "loss": 0.4954, "step": 1556 }, { "epoch": 0.34731206781173324, "grad_norm": 0.17018267512321472, "learning_rate": 1.9418644931466507e-05, "loss": 0.4785, "step": 1557 }, { "epoch": 0.3475351327236226, "grad_norm": 0.18296095728874207, "learning_rate": 1.9417853934969347e-05, "loss": 0.5244, "step": 1558 }, { "epoch": 0.3477581976355119, "grad_norm": 0.1640445590019226, "learning_rate": 1.9417062416852198e-05, "loss": 0.5273, "step": 1559 }, { "epoch": 0.3479812625474013, "grad_norm": 0.1624521166086197, "learning_rate": 1.9416270377158896e-05, "loss": 0.4939, "step": 1560 }, { "epoch": 0.34820432745929064, "grad_norm": 0.16685132682323456, "learning_rate": 1.941547781593331e-05, "loss": 0.5118, "step": 1561 }, { "epoch": 0.34842739237118003, "grad_norm": 0.1717056930065155, "learning_rate": 1.9414684733219334e-05, "loss": 0.5156, "step": 1562 }, { "epoch": 0.34865045728306937, "grad_norm": 0.18991652131080627, "learning_rate": 1.94138911290609e-05, "loss": 0.5346, "step": 1563 }, { "epoch": 0.3488735221949587, "grad_norm": 0.16688503324985504, "learning_rate": 1.941309700350196e-05, "loss": 0.5333, "step": 1564 }, { "epoch": 0.3490965871068481, "grad_norm": 0.17251865565776825, "learning_rate": 1.9412302356586494e-05, "loss": 0.5113, "step": 1565 }, { "epoch": 0.34931965201873744, "grad_norm": 0.15765444934368134, "learning_rate": 1.941150718835852e-05, "loss": 0.5305, "step": 1566 }, { "epoch": 0.34954271693062683, "grad_norm": 0.18435880541801453, "learning_rate": 1.9410711498862077e-05, "loss": 0.5087, "step": 1567 }, { "epoch": 0.34976578184251617, "grad_norm": 0.16191725432872772, "learning_rate": 1.9409915288141235e-05, "loss": 0.5001, "step": 1568 }, { "epoch": 0.34998884675440556, "grad_norm": 0.19742515683174133, "learning_rate": 1.9409118556240095e-05, "loss": 0.5232, "step": 1569 }, { "epoch": 0.3502119116662949, "grad_norm": 0.16742932796478271, "learning_rate": 1.940832130320278e-05, "loss": 0.5227, "step": 1570 }, { "epoch": 0.35043497657818423, "grad_norm": 0.17970915138721466, "learning_rate": 1.9407523529073455e-05, "loss": 0.5228, "step": 1571 }, { "epoch": 0.3506580414900736, "grad_norm": 0.16648298501968384, "learning_rate": 1.9406725233896297e-05, "loss": 0.4987, "step": 1572 }, { "epoch": 0.35088110640196296, "grad_norm": 0.1718263030052185, "learning_rate": 1.940592641771553e-05, "loss": 0.5256, "step": 1573 }, { "epoch": 0.35110417131385235, "grad_norm": 0.17099805176258087, "learning_rate": 1.9405127080575387e-05, "loss": 0.5371, "step": 1574 }, { "epoch": 0.3513272362257417, "grad_norm": 0.16639132797718048, "learning_rate": 1.9404327222520147e-05, "loss": 0.5055, "step": 1575 }, { "epoch": 0.351550301137631, "grad_norm": 0.15915724635124207, "learning_rate": 1.9403526843594115e-05, "loss": 0.5123, "step": 1576 }, { "epoch": 0.3517733660495204, "grad_norm": 0.190689817070961, "learning_rate": 1.9402725943841608e-05, "loss": 0.5149, "step": 1577 }, { "epoch": 0.35199643096140976, "grad_norm": 0.1728559285402298, "learning_rate": 1.9401924523306998e-05, "loss": 0.518, "step": 1578 }, { "epoch": 0.35221949587329915, "grad_norm": 0.15996624529361725, "learning_rate": 1.9401122582034664e-05, "loss": 0.4949, "step": 1579 }, { "epoch": 0.3524425607851885, "grad_norm": 0.17822036147117615, "learning_rate": 1.940032012006903e-05, "loss": 0.5012, "step": 1580 }, { "epoch": 0.3526656256970778, "grad_norm": 0.18252968788146973, "learning_rate": 1.9399517137454534e-05, "loss": 0.54, "step": 1581 }, { "epoch": 0.3528886906089672, "grad_norm": 0.1667163223028183, "learning_rate": 1.939871363423566e-05, "loss": 0.5176, "step": 1582 }, { "epoch": 0.35311175552085655, "grad_norm": 0.17513629794120789, "learning_rate": 1.9397909610456897e-05, "loss": 0.5182, "step": 1583 }, { "epoch": 0.35333482043274594, "grad_norm": 0.16966688632965088, "learning_rate": 1.939710506616279e-05, "loss": 0.5154, "step": 1584 }, { "epoch": 0.3535578853446353, "grad_norm": 0.17325864732265472, "learning_rate": 1.9396300001397888e-05, "loss": 0.5321, "step": 1585 }, { "epoch": 0.3537809502565247, "grad_norm": 0.17322127521038055, "learning_rate": 1.939549441620679e-05, "loss": 0.5206, "step": 1586 }, { "epoch": 0.354004015168414, "grad_norm": 0.16276715695858002, "learning_rate": 1.9394688310634114e-05, "loss": 0.4874, "step": 1587 }, { "epoch": 0.35422708008030335, "grad_norm": 0.1607387810945511, "learning_rate": 1.93938816847245e-05, "loss": 0.4813, "step": 1588 }, { "epoch": 0.35445014499219274, "grad_norm": 0.16621682047843933, "learning_rate": 1.939307453852263e-05, "loss": 0.533, "step": 1589 }, { "epoch": 0.3546732099040821, "grad_norm": 0.165096253156662, "learning_rate": 1.9392266872073207e-05, "loss": 0.5115, "step": 1590 }, { "epoch": 0.35489627481597147, "grad_norm": 0.17987790703773499, "learning_rate": 1.9391458685420966e-05, "loss": 0.5439, "step": 1591 }, { "epoch": 0.3551193397278608, "grad_norm": 0.1641106754541397, "learning_rate": 1.939064997861067e-05, "loss": 0.462, "step": 1592 }, { "epoch": 0.35534240463975014, "grad_norm": 0.23700258135795593, "learning_rate": 1.9389840751687105e-05, "loss": 0.4945, "step": 1593 }, { "epoch": 0.35556546955163953, "grad_norm": 0.1738569736480713, "learning_rate": 1.9389031004695095e-05, "loss": 0.5247, "step": 1594 }, { "epoch": 0.35578853446352887, "grad_norm": 0.16523152589797974, "learning_rate": 1.9388220737679493e-05, "loss": 0.5231, "step": 1595 }, { "epoch": 0.35601159937541826, "grad_norm": 0.1748376339673996, "learning_rate": 1.9387409950685167e-05, "loss": 0.5048, "step": 1596 }, { "epoch": 0.3562346642873076, "grad_norm": 0.20013664662837982, "learning_rate": 1.938659864375703e-05, "loss": 0.5284, "step": 1597 }, { "epoch": 0.356457729199197, "grad_norm": 0.18472377955913544, "learning_rate": 1.938578681694002e-05, "loss": 0.5063, "step": 1598 }, { "epoch": 0.35668079411108633, "grad_norm": 0.17718833684921265, "learning_rate": 1.9384974470279093e-05, "loss": 0.5156, "step": 1599 }, { "epoch": 0.35690385902297567, "grad_norm": 0.16550204157829285, "learning_rate": 1.938416160381925e-05, "loss": 0.487, "step": 1600 }, { "epoch": 0.35712692393486506, "grad_norm": 0.17269675433635712, "learning_rate": 1.93833482176055e-05, "loss": 0.4989, "step": 1601 }, { "epoch": 0.3573499888467544, "grad_norm": 0.17201650142669678, "learning_rate": 1.938253431168291e-05, "loss": 0.5075, "step": 1602 }, { "epoch": 0.3575730537586438, "grad_norm": 0.17147138714790344, "learning_rate": 1.938171988609655e-05, "loss": 0.4967, "step": 1603 }, { "epoch": 0.3577961186705331, "grad_norm": 0.20010094344615936, "learning_rate": 1.938090494089153e-05, "loss": 0.5335, "step": 1604 }, { "epoch": 0.35801918358242246, "grad_norm": 0.19643662869930267, "learning_rate": 1.9380089476112985e-05, "loss": 0.586, "step": 1605 }, { "epoch": 0.35824224849431185, "grad_norm": 0.16737660765647888, "learning_rate": 1.937927349180608e-05, "loss": 0.5203, "step": 1606 }, { "epoch": 0.3584653134062012, "grad_norm": 0.1626511663198471, "learning_rate": 1.9378456988016015e-05, "loss": 0.5057, "step": 1607 }, { "epoch": 0.3586883783180906, "grad_norm": 0.18112361431121826, "learning_rate": 1.9377639964788005e-05, "loss": 0.5709, "step": 1608 }, { "epoch": 0.3589114432299799, "grad_norm": 0.1701660305261612, "learning_rate": 1.937682242216731e-05, "loss": 0.5427, "step": 1609 }, { "epoch": 0.35913450814186926, "grad_norm": 0.16982769966125488, "learning_rate": 1.9376004360199202e-05, "loss": 0.5414, "step": 1610 }, { "epoch": 0.35935757305375865, "grad_norm": 0.1677013635635376, "learning_rate": 1.9375185778928997e-05, "loss": 0.5189, "step": 1611 }, { "epoch": 0.359580637965648, "grad_norm": 0.16802898049354553, "learning_rate": 1.9374366678402032e-05, "loss": 0.5359, "step": 1612 }, { "epoch": 0.3598037028775374, "grad_norm": 0.1629040390253067, "learning_rate": 1.9373547058663674e-05, "loss": 0.5239, "step": 1613 }, { "epoch": 0.3600267677894267, "grad_norm": 0.15942612290382385, "learning_rate": 1.9372726919759318e-05, "loss": 0.4927, "step": 1614 }, { "epoch": 0.3602498327013161, "grad_norm": 0.16228771209716797, "learning_rate": 1.9371906261734387e-05, "loss": 0.4795, "step": 1615 }, { "epoch": 0.36047289761320545, "grad_norm": 0.18663759529590607, "learning_rate": 1.9371085084634337e-05, "loss": 0.4523, "step": 1616 }, { "epoch": 0.3606959625250948, "grad_norm": 0.1616244614124298, "learning_rate": 1.9370263388504647e-05, "loss": 0.5221, "step": 1617 }, { "epoch": 0.3609190274369842, "grad_norm": 0.16261187195777893, "learning_rate": 1.936944117339083e-05, "loss": 0.507, "step": 1618 }, { "epoch": 0.3611420923488735, "grad_norm": 0.16541439294815063, "learning_rate": 1.9368618439338424e-05, "loss": 0.5318, "step": 1619 }, { "epoch": 0.3613651572607629, "grad_norm": 0.17135834693908691, "learning_rate": 1.9367795186392996e-05, "loss": 0.5071, "step": 1620 }, { "epoch": 0.36158822217265224, "grad_norm": 0.1599961221218109, "learning_rate": 1.936697141460015e-05, "loss": 0.5031, "step": 1621 }, { "epoch": 0.3618112870845416, "grad_norm": 0.17239384353160858, "learning_rate": 1.9366147124005504e-05, "loss": 0.5138, "step": 1622 }, { "epoch": 0.36203435199643097, "grad_norm": 0.1636582612991333, "learning_rate": 1.9365322314654714e-05, "loss": 0.5197, "step": 1623 }, { "epoch": 0.3622574169083203, "grad_norm": 0.17068350315093994, "learning_rate": 1.9364496986593463e-05, "loss": 0.4886, "step": 1624 }, { "epoch": 0.3624804818202097, "grad_norm": 0.17033697664737701, "learning_rate": 1.9363671139867467e-05, "loss": 0.5232, "step": 1625 }, { "epoch": 0.36270354673209904, "grad_norm": 0.16417935490608215, "learning_rate": 1.936284477452246e-05, "loss": 0.5384, "step": 1626 }, { "epoch": 0.36292661164398843, "grad_norm": 0.1705051213502884, "learning_rate": 1.9362017890604215e-05, "loss": 0.517, "step": 1627 }, { "epoch": 0.36314967655587777, "grad_norm": 0.17601191997528076, "learning_rate": 1.9361190488158535e-05, "loss": 0.5141, "step": 1628 }, { "epoch": 0.3633727414677671, "grad_norm": 0.173202782869339, "learning_rate": 1.936036256723124e-05, "loss": 0.5133, "step": 1629 }, { "epoch": 0.3635958063796565, "grad_norm": 0.1655133217573166, "learning_rate": 1.935953412786818e-05, "loss": 0.5278, "step": 1630 }, { "epoch": 0.36381887129154583, "grad_norm": 0.1649170219898224, "learning_rate": 1.9358705170115253e-05, "loss": 0.5101, "step": 1631 }, { "epoch": 0.3640419362034352, "grad_norm": 0.4769546389579773, "learning_rate": 1.9357875694018364e-05, "loss": 0.5325, "step": 1632 }, { "epoch": 0.36426500111532456, "grad_norm": 0.16675199568271637, "learning_rate": 1.9357045699623452e-05, "loss": 0.5373, "step": 1633 }, { "epoch": 0.3644880660272139, "grad_norm": 0.18537083268165588, "learning_rate": 1.9356215186976496e-05, "loss": 0.5077, "step": 1634 }, { "epoch": 0.3647111309391033, "grad_norm": 0.1646578013896942, "learning_rate": 1.935538415612349e-05, "loss": 0.5163, "step": 1635 }, { "epoch": 0.3649341958509926, "grad_norm": 0.17558851838111877, "learning_rate": 1.935455260711046e-05, "loss": 0.5269, "step": 1636 }, { "epoch": 0.365157260762882, "grad_norm": 0.1750616431236267, "learning_rate": 1.9353720539983462e-05, "loss": 0.5185, "step": 1637 }, { "epoch": 0.36538032567477136, "grad_norm": 0.1686229407787323, "learning_rate": 1.9352887954788583e-05, "loss": 0.5033, "step": 1638 }, { "epoch": 0.3656033905866607, "grad_norm": 0.17237605154514313, "learning_rate": 1.935205485157194e-05, "loss": 0.5071, "step": 1639 }, { "epoch": 0.3658264554985501, "grad_norm": 0.17209577560424805, "learning_rate": 1.9351221230379673e-05, "loss": 0.5049, "step": 1640 }, { "epoch": 0.3660495204104394, "grad_norm": 0.1662733256816864, "learning_rate": 1.9350387091257952e-05, "loss": 0.5399, "step": 1641 }, { "epoch": 0.3662725853223288, "grad_norm": 0.17135699093341827, "learning_rate": 1.9349552434252976e-05, "loss": 0.5175, "step": 1642 }, { "epoch": 0.36649565023421815, "grad_norm": 0.17738810181617737, "learning_rate": 1.9348717259410975e-05, "loss": 0.5464, "step": 1643 }, { "epoch": 0.36671871514610754, "grad_norm": 0.17101503908634186, "learning_rate": 1.9347881566778208e-05, "loss": 0.5162, "step": 1644 }, { "epoch": 0.3669417800579969, "grad_norm": 0.16841377317905426, "learning_rate": 1.934704535640096e-05, "loss": 0.5244, "step": 1645 }, { "epoch": 0.3671648449698862, "grad_norm": 0.17288081347942352, "learning_rate": 1.9346208628325543e-05, "loss": 0.5487, "step": 1646 }, { "epoch": 0.3673879098817756, "grad_norm": 0.1731618493795395, "learning_rate": 1.93453713825983e-05, "loss": 0.4924, "step": 1647 }, { "epoch": 0.36761097479366495, "grad_norm": 0.18122443556785583, "learning_rate": 1.934453361926561e-05, "loss": 0.5105, "step": 1648 }, { "epoch": 0.36783403970555434, "grad_norm": 0.17434357106685638, "learning_rate": 1.9343695338373866e-05, "loss": 0.5185, "step": 1649 }, { "epoch": 0.3680571046174437, "grad_norm": 0.19195139408111572, "learning_rate": 1.93428565399695e-05, "loss": 0.5074, "step": 1650 }, { "epoch": 0.368280169529333, "grad_norm": 0.17529140412807465, "learning_rate": 1.9342017224098974e-05, "loss": 0.5355, "step": 1651 }, { "epoch": 0.3685032344412224, "grad_norm": 0.16739797592163086, "learning_rate": 1.9341177390808768e-05, "loss": 0.5128, "step": 1652 }, { "epoch": 0.36872629935311174, "grad_norm": 0.17157401144504547, "learning_rate": 1.9340337040145397e-05, "loss": 0.5117, "step": 1653 }, { "epoch": 0.36894936426500113, "grad_norm": 0.166087806224823, "learning_rate": 1.933949617215541e-05, "loss": 0.5182, "step": 1654 }, { "epoch": 0.36917242917689047, "grad_norm": 0.17426824569702148, "learning_rate": 1.9338654786885377e-05, "loss": 0.5141, "step": 1655 }, { "epoch": 0.3693954940887798, "grad_norm": 0.18022432923316956, "learning_rate": 1.93378128843819e-05, "loss": 0.5097, "step": 1656 }, { "epoch": 0.3696185590006692, "grad_norm": 0.1715417206287384, "learning_rate": 1.933697046469161e-05, "loss": 0.525, "step": 1657 }, { "epoch": 0.36984162391255854, "grad_norm": 0.17939765751361847, "learning_rate": 1.9336127527861158e-05, "loss": 0.5166, "step": 1658 }, { "epoch": 0.37006468882444793, "grad_norm": 0.1856374740600586, "learning_rate": 1.9335284073937242e-05, "loss": 0.5078, "step": 1659 }, { "epoch": 0.37028775373633727, "grad_norm": 0.16677772998809814, "learning_rate": 1.9334440102966567e-05, "loss": 0.5427, "step": 1660 }, { "epoch": 0.37051081864822666, "grad_norm": 0.17052651941776276, "learning_rate": 1.933359561499589e-05, "loss": 0.4976, "step": 1661 }, { "epoch": 0.370733883560116, "grad_norm": 0.180454283952713, "learning_rate": 1.9332750610071972e-05, "loss": 0.4807, "step": 1662 }, { "epoch": 0.37095694847200533, "grad_norm": 0.16270607709884644, "learning_rate": 1.9331905088241623e-05, "loss": 0.5079, "step": 1663 }, { "epoch": 0.3711800133838947, "grad_norm": 0.1687788963317871, "learning_rate": 1.9331059049551668e-05, "loss": 0.5208, "step": 1664 }, { "epoch": 0.37140307829578406, "grad_norm": 0.16753818094730377, "learning_rate": 1.933021249404897e-05, "loss": 0.4991, "step": 1665 }, { "epoch": 0.37162614320767345, "grad_norm": 0.1763564944267273, "learning_rate": 1.9329365421780414e-05, "loss": 0.5321, "step": 1666 }, { "epoch": 0.3718492081195628, "grad_norm": 0.1733461618423462, "learning_rate": 1.932851783279292e-05, "loss": 0.5331, "step": 1667 }, { "epoch": 0.3720722730314521, "grad_norm": 0.1711835116147995, "learning_rate": 1.9327669727133424e-05, "loss": 0.4764, "step": 1668 }, { "epoch": 0.3722953379433415, "grad_norm": 0.190887913107872, "learning_rate": 1.932682110484891e-05, "loss": 0.5254, "step": 1669 }, { "epoch": 0.37251840285523086, "grad_norm": 0.16770336031913757, "learning_rate": 1.9325971965986373e-05, "loss": 0.4989, "step": 1670 }, { "epoch": 0.37274146776712025, "grad_norm": 0.2075570970773697, "learning_rate": 1.9325122310592846e-05, "loss": 0.515, "step": 1671 }, { "epoch": 0.3729645326790096, "grad_norm": 0.17692680656909943, "learning_rate": 1.9324272138715388e-05, "loss": 0.5442, "step": 1672 }, { "epoch": 0.373187597590899, "grad_norm": 0.17124991118907928, "learning_rate": 1.932342145040109e-05, "loss": 0.5066, "step": 1673 }, { "epoch": 0.3734106625027883, "grad_norm": 0.16621904075145721, "learning_rate": 1.932257024569706e-05, "loss": 0.5066, "step": 1674 }, { "epoch": 0.37363372741467765, "grad_norm": 0.17721469700336456, "learning_rate": 1.932171852465045e-05, "loss": 0.4789, "step": 1675 }, { "epoch": 0.37385679232656704, "grad_norm": 0.16972602903842926, "learning_rate": 1.9320866287308433e-05, "loss": 0.4922, "step": 1676 }, { "epoch": 0.3740798572384564, "grad_norm": 0.17205440998077393, "learning_rate": 1.9320013533718208e-05, "loss": 0.4909, "step": 1677 }, { "epoch": 0.3743029221503458, "grad_norm": 0.16761687397956848, "learning_rate": 1.9319160263927013e-05, "loss": 0.513, "step": 1678 }, { "epoch": 0.3745259870622351, "grad_norm": 0.16278418898582458, "learning_rate": 1.93183064779821e-05, "loss": 0.508, "step": 1679 }, { "epoch": 0.37474905197412445, "grad_norm": 0.17359983921051025, "learning_rate": 1.931745217593076e-05, "loss": 0.5472, "step": 1680 }, { "epoch": 0.37497211688601384, "grad_norm": 0.16624867916107178, "learning_rate": 1.931659735782031e-05, "loss": 0.53, "step": 1681 }, { "epoch": 0.3751951817979032, "grad_norm": 0.16128243505954742, "learning_rate": 1.9315742023698095e-05, "loss": 0.474, "step": 1682 }, { "epoch": 0.37541824670979257, "grad_norm": 0.19452917575836182, "learning_rate": 1.9314886173611487e-05, "loss": 0.5527, "step": 1683 }, { "epoch": 0.3756413116216819, "grad_norm": 0.18000726401805878, "learning_rate": 1.931402980760789e-05, "loss": 0.5185, "step": 1684 }, { "epoch": 0.37586437653357124, "grad_norm": 0.1593390852212906, "learning_rate": 1.9313172925734736e-05, "loss": 0.503, "step": 1685 }, { "epoch": 0.37608744144546064, "grad_norm": 0.1637982428073883, "learning_rate": 1.931231552803948e-05, "loss": 0.5011, "step": 1686 }, { "epoch": 0.37631050635734997, "grad_norm": 0.18159234523773193, "learning_rate": 1.931145761456962e-05, "loss": 0.501, "step": 1687 }, { "epoch": 0.37653357126923936, "grad_norm": 0.17507293820381165, "learning_rate": 1.9310599185372657e-05, "loss": 0.5219, "step": 1688 }, { "epoch": 0.3767566361811287, "grad_norm": 0.16377007961273193, "learning_rate": 1.9309740240496152e-05, "loss": 0.5096, "step": 1689 }, { "epoch": 0.3769797010930181, "grad_norm": 0.171824112534523, "learning_rate": 1.930888077998767e-05, "loss": 0.5366, "step": 1690 }, { "epoch": 0.37720276600490743, "grad_norm": 0.18775586783885956, "learning_rate": 1.9308020803894813e-05, "loss": 0.5018, "step": 1691 }, { "epoch": 0.37742583091679677, "grad_norm": 0.1640700101852417, "learning_rate": 1.9307160312265216e-05, "loss": 0.5159, "step": 1692 }, { "epoch": 0.37764889582868616, "grad_norm": 0.1758948117494583, "learning_rate": 1.9306299305146535e-05, "loss": 0.5171, "step": 1693 }, { "epoch": 0.3778719607405755, "grad_norm": 0.1625639647245407, "learning_rate": 1.9305437782586463e-05, "loss": 0.4989, "step": 1694 }, { "epoch": 0.3780950256524649, "grad_norm": 0.15426993370056152, "learning_rate": 1.9304575744632708e-05, "loss": 0.4781, "step": 1695 }, { "epoch": 0.3783180905643542, "grad_norm": 0.18176917731761932, "learning_rate": 1.9303713191333025e-05, "loss": 0.5307, "step": 1696 }, { "epoch": 0.37854115547624356, "grad_norm": 0.16359803080558777, "learning_rate": 1.930285012273518e-05, "loss": 0.5067, "step": 1697 }, { "epoch": 0.37876422038813296, "grad_norm": 0.16793343424797058, "learning_rate": 1.930198653888698e-05, "loss": 0.5234, "step": 1698 }, { "epoch": 0.3789872853000223, "grad_norm": 0.17032268643379211, "learning_rate": 1.930112243983625e-05, "loss": 0.5521, "step": 1699 }, { "epoch": 0.3792103502119117, "grad_norm": 0.18838275969028473, "learning_rate": 1.930025782563086e-05, "loss": 0.5203, "step": 1700 }, { "epoch": 0.379433415123801, "grad_norm": 0.1813000738620758, "learning_rate": 1.9299392696318683e-05, "loss": 0.5299, "step": 1701 }, { "epoch": 0.3796564800356904, "grad_norm": 0.15768998861312866, "learning_rate": 1.9298527051947645e-05, "loss": 0.4802, "step": 1702 }, { "epoch": 0.37987954494757975, "grad_norm": 0.15893371403217316, "learning_rate": 1.9297660892565692e-05, "loss": 0.4939, "step": 1703 }, { "epoch": 0.3801026098594691, "grad_norm": 0.16448427736759186, "learning_rate": 1.929679421822079e-05, "loss": 0.503, "step": 1704 }, { "epoch": 0.3803256747713585, "grad_norm": 0.1799025982618332, "learning_rate": 1.9295927028960947e-05, "loss": 0.5146, "step": 1705 }, { "epoch": 0.3805487396832478, "grad_norm": 0.17270612716674805, "learning_rate": 1.9295059324834193e-05, "loss": 0.5472, "step": 1706 }, { "epoch": 0.3807718045951372, "grad_norm": 0.1640714704990387, "learning_rate": 1.9294191105888586e-05, "loss": 0.4847, "step": 1707 }, { "epoch": 0.38099486950702655, "grad_norm": 0.16522546112537384, "learning_rate": 1.9293322372172207e-05, "loss": 0.4752, "step": 1708 }, { "epoch": 0.3812179344189159, "grad_norm": 0.17741802334785461, "learning_rate": 1.9292453123733184e-05, "loss": 0.5246, "step": 1709 }, { "epoch": 0.3814409993308053, "grad_norm": 0.3004036545753479, "learning_rate": 1.9291583360619653e-05, "loss": 0.5032, "step": 1710 }, { "epoch": 0.3816640642426946, "grad_norm": 0.17262016236782074, "learning_rate": 1.9290713082879786e-05, "loss": 0.5208, "step": 1711 }, { "epoch": 0.381887129154584, "grad_norm": 0.1745826005935669, "learning_rate": 1.928984229056179e-05, "loss": 0.5245, "step": 1712 }, { "epoch": 0.38211019406647334, "grad_norm": 0.19315177202224731, "learning_rate": 1.9288970983713893e-05, "loss": 0.5029, "step": 1713 }, { "epoch": 0.3823332589783627, "grad_norm": 0.17079593241214752, "learning_rate": 1.9288099162384354e-05, "loss": 0.5204, "step": 1714 }, { "epoch": 0.38255632389025207, "grad_norm": 0.17363616824150085, "learning_rate": 1.9287226826621457e-05, "loss": 0.5195, "step": 1715 }, { "epoch": 0.3827793888021414, "grad_norm": 0.1668912172317505, "learning_rate": 1.928635397647352e-05, "loss": 0.5259, "step": 1716 }, { "epoch": 0.3830024537140308, "grad_norm": 0.162687286734581, "learning_rate": 1.9285480611988886e-05, "loss": 0.5135, "step": 1717 }, { "epoch": 0.38322551862592014, "grad_norm": 0.1736724078655243, "learning_rate": 1.9284606733215925e-05, "loss": 0.4855, "step": 1718 }, { "epoch": 0.38344858353780953, "grad_norm": 0.16885711252689362, "learning_rate": 1.9283732340203045e-05, "loss": 0.4934, "step": 1719 }, { "epoch": 0.38367164844969887, "grad_norm": 0.17074720561504364, "learning_rate": 1.928285743299867e-05, "loss": 0.5167, "step": 1720 }, { "epoch": 0.3838947133615882, "grad_norm": 0.17049984633922577, "learning_rate": 1.9281982011651257e-05, "loss": 0.498, "step": 1721 }, { "epoch": 0.3841177782734776, "grad_norm": 0.21770413219928741, "learning_rate": 1.9281106076209296e-05, "loss": 0.4869, "step": 1722 }, { "epoch": 0.38434084318536693, "grad_norm": 0.17190971970558167, "learning_rate": 1.9280229626721302e-05, "loss": 0.5476, "step": 1723 }, { "epoch": 0.3845639080972563, "grad_norm": 0.20358806848526, "learning_rate": 1.9279352663235813e-05, "loss": 0.5074, "step": 1724 }, { "epoch": 0.38478697300914566, "grad_norm": 0.19548457860946655, "learning_rate": 1.9278475185801404e-05, "loss": 0.5246, "step": 1725 }, { "epoch": 0.385010037921035, "grad_norm": 0.18699929118156433, "learning_rate": 1.9277597194466674e-05, "loss": 0.5267, "step": 1726 }, { "epoch": 0.3852331028329244, "grad_norm": 0.16484257578849792, "learning_rate": 1.9276718689280258e-05, "loss": 0.5108, "step": 1727 }, { "epoch": 0.3854561677448137, "grad_norm": 0.16136226058006287, "learning_rate": 1.9275839670290804e-05, "loss": 0.4973, "step": 1728 }, { "epoch": 0.3856792326567031, "grad_norm": 0.16811032593250275, "learning_rate": 1.9274960137547002e-05, "loss": 0.4971, "step": 1729 }, { "epoch": 0.38590229756859246, "grad_norm": 0.17258323729038239, "learning_rate": 1.9274080091097568e-05, "loss": 0.5513, "step": 1730 }, { "epoch": 0.3861253624804818, "grad_norm": 0.16782884299755096, "learning_rate": 1.927319953099124e-05, "loss": 0.5032, "step": 1731 }, { "epoch": 0.3863484273923712, "grad_norm": 0.1599043756723404, "learning_rate": 1.9272318457276792e-05, "loss": 0.5117, "step": 1732 }, { "epoch": 0.3865714923042605, "grad_norm": 0.18118049204349518, "learning_rate": 1.9271436870003022e-05, "loss": 0.5476, "step": 1733 }, { "epoch": 0.3867945572161499, "grad_norm": 0.16125431656837463, "learning_rate": 1.927055476921876e-05, "loss": 0.496, "step": 1734 }, { "epoch": 0.38701762212803925, "grad_norm": 0.17001016438007355, "learning_rate": 1.9269672154972863e-05, "loss": 0.5274, "step": 1735 }, { "epoch": 0.38724068703992864, "grad_norm": 0.18982788920402527, "learning_rate": 1.9268789027314208e-05, "loss": 0.5423, "step": 1736 }, { "epoch": 0.387463751951818, "grad_norm": 0.20157238841056824, "learning_rate": 1.9267905386291716e-05, "loss": 0.5204, "step": 1737 }, { "epoch": 0.3876868168637073, "grad_norm": 0.16580817103385925, "learning_rate": 1.926702123195433e-05, "loss": 0.523, "step": 1738 }, { "epoch": 0.3879098817755967, "grad_norm": 0.1722583770751953, "learning_rate": 1.926613656435101e-05, "loss": 0.4689, "step": 1739 }, { "epoch": 0.38813294668748605, "grad_norm": 0.16248932480812073, "learning_rate": 1.9265251383530765e-05, "loss": 0.4838, "step": 1740 }, { "epoch": 0.38835601159937544, "grad_norm": 0.16789276897907257, "learning_rate": 1.9264365689542616e-05, "loss": 0.5191, "step": 1741 }, { "epoch": 0.3885790765112648, "grad_norm": 0.16488412022590637, "learning_rate": 1.926347948243562e-05, "loss": 0.5124, "step": 1742 }, { "epoch": 0.3888021414231541, "grad_norm": 0.17527227103710175, "learning_rate": 1.926259276225886e-05, "loss": 0.5042, "step": 1743 }, { "epoch": 0.3890252063350435, "grad_norm": 0.16100718080997467, "learning_rate": 1.926170552906145e-05, "loss": 0.5103, "step": 1744 }, { "epoch": 0.38924827124693284, "grad_norm": 0.1704183667898178, "learning_rate": 1.926081778289253e-05, "loss": 0.5252, "step": 1745 }, { "epoch": 0.38947133615882223, "grad_norm": 0.15948736667633057, "learning_rate": 1.9259929523801266e-05, "loss": 0.4972, "step": 1746 }, { "epoch": 0.38969440107071157, "grad_norm": 0.15783725678920746, "learning_rate": 1.9259040751836858e-05, "loss": 0.4862, "step": 1747 }, { "epoch": 0.38991746598260096, "grad_norm": 0.22207237780094147, "learning_rate": 1.9258151467048533e-05, "loss": 0.5058, "step": 1748 }, { "epoch": 0.3901405308944903, "grad_norm": 0.18270133435726166, "learning_rate": 1.9257261669485544e-05, "loss": 0.4872, "step": 1749 }, { "epoch": 0.39036359580637964, "grad_norm": 0.15779659152030945, "learning_rate": 1.925637135919717e-05, "loss": 0.4972, "step": 1750 }, { "epoch": 0.39058666071826903, "grad_norm": 0.16197752952575684, "learning_rate": 1.9255480536232728e-05, "loss": 0.5001, "step": 1751 }, { "epoch": 0.39080972563015837, "grad_norm": 0.1598835587501526, "learning_rate": 1.9254589200641556e-05, "loss": 0.4991, "step": 1752 }, { "epoch": 0.39103279054204776, "grad_norm": 0.1754835844039917, "learning_rate": 1.925369735247302e-05, "loss": 0.4858, "step": 1753 }, { "epoch": 0.3912558554539371, "grad_norm": 0.17721553146839142, "learning_rate": 1.9252804991776513e-05, "loss": 0.4954, "step": 1754 }, { "epoch": 0.39147892036582643, "grad_norm": 0.16600117087364197, "learning_rate": 1.9251912118601466e-05, "loss": 0.5006, "step": 1755 }, { "epoch": 0.3917019852777158, "grad_norm": 0.18240424990653992, "learning_rate": 1.925101873299733e-05, "loss": 0.5563, "step": 1756 }, { "epoch": 0.39192505018960516, "grad_norm": 0.2154020071029663, "learning_rate": 1.9250124835013583e-05, "loss": 0.5213, "step": 1757 }, { "epoch": 0.39214811510149455, "grad_norm": 0.17148853838443756, "learning_rate": 1.9249230424699735e-05, "loss": 0.5378, "step": 1758 }, { "epoch": 0.3923711800133839, "grad_norm": 0.17164389789104462, "learning_rate": 1.9248335502105328e-05, "loss": 0.5272, "step": 1759 }, { "epoch": 0.39259424492527323, "grad_norm": 0.16211992502212524, "learning_rate": 1.924744006727993e-05, "loss": 0.5543, "step": 1760 }, { "epoch": 0.3928173098371626, "grad_norm": 0.17015716433525085, "learning_rate": 1.924654412027313e-05, "loss": 0.5281, "step": 1761 }, { "epoch": 0.39304037474905196, "grad_norm": 0.15979216992855072, "learning_rate": 1.924564766113455e-05, "loss": 0.4938, "step": 1762 }, { "epoch": 0.39326343966094135, "grad_norm": 0.16782401502132416, "learning_rate": 1.924475068991385e-05, "loss": 0.4876, "step": 1763 }, { "epoch": 0.3934865045728307, "grad_norm": 0.1688111275434494, "learning_rate": 1.9243853206660703e-05, "loss": 0.503, "step": 1764 }, { "epoch": 0.3937095694847201, "grad_norm": 0.16509543359279633, "learning_rate": 1.924295521142482e-05, "loss": 0.4955, "step": 1765 }, { "epoch": 0.3939326343966094, "grad_norm": 0.17914824187755585, "learning_rate": 1.9242056704255935e-05, "loss": 0.5334, "step": 1766 }, { "epoch": 0.39415569930849875, "grad_norm": 0.18346786499023438, "learning_rate": 1.9241157685203817e-05, "loss": 0.5135, "step": 1767 }, { "epoch": 0.39437876422038814, "grad_norm": 0.18024152517318726, "learning_rate": 1.9240258154318257e-05, "loss": 0.5284, "step": 1768 }, { "epoch": 0.3946018291322775, "grad_norm": 0.17404595017433167, "learning_rate": 1.923935811164908e-05, "loss": 0.5069, "step": 1769 }, { "epoch": 0.3948248940441669, "grad_norm": 0.15942560136318207, "learning_rate": 1.9238457557246128e-05, "loss": 0.5034, "step": 1770 }, { "epoch": 0.3950479589560562, "grad_norm": 0.15909984707832336, "learning_rate": 1.9237556491159285e-05, "loss": 0.4762, "step": 1771 }, { "epoch": 0.39527102386794555, "grad_norm": 0.16344551742076874, "learning_rate": 1.9236654913438456e-05, "loss": 0.5243, "step": 1772 }, { "epoch": 0.39549408877983494, "grad_norm": 0.1672784835100174, "learning_rate": 1.923575282413358e-05, "loss": 0.5057, "step": 1773 }, { "epoch": 0.3957171536917243, "grad_norm": 0.1585850566625595, "learning_rate": 1.9234850223294613e-05, "loss": 0.5074, "step": 1774 }, { "epoch": 0.39594021860361367, "grad_norm": 0.17053529620170593, "learning_rate": 1.9233947110971556e-05, "loss": 0.5556, "step": 1775 }, { "epoch": 0.396163283515503, "grad_norm": 0.16313889622688293, "learning_rate": 1.9233043487214423e-05, "loss": 0.491, "step": 1776 }, { "epoch": 0.3963863484273924, "grad_norm": 0.15941911935806274, "learning_rate": 1.9232139352073265e-05, "loss": 0.4862, "step": 1777 }, { "epoch": 0.39660941333928174, "grad_norm": 0.1699916571378708, "learning_rate": 1.9231234705598153e-05, "loss": 0.542, "step": 1778 }, { "epoch": 0.3968324782511711, "grad_norm": 0.17430098354816437, "learning_rate": 1.9230329547839196e-05, "loss": 0.5006, "step": 1779 }, { "epoch": 0.39705554316306046, "grad_norm": 0.16042235493659973, "learning_rate": 1.9229423878846535e-05, "loss": 0.5087, "step": 1780 }, { "epoch": 0.3972786080749498, "grad_norm": 0.1637001633644104, "learning_rate": 1.9228517698670316e-05, "loss": 0.4966, "step": 1781 }, { "epoch": 0.3975016729868392, "grad_norm": 0.1627058982849121, "learning_rate": 1.922761100736074e-05, "loss": 0.4882, "step": 1782 }, { "epoch": 0.39772473789872853, "grad_norm": 0.1667163074016571, "learning_rate": 1.9226703804968022e-05, "loss": 0.5295, "step": 1783 }, { "epoch": 0.39794780281061787, "grad_norm": 0.16532334685325623, "learning_rate": 1.9225796091542412e-05, "loss": 0.5062, "step": 1784 }, { "epoch": 0.39817086772250726, "grad_norm": 0.16788703203201294, "learning_rate": 1.9224887867134178e-05, "loss": 0.5276, "step": 1785 }, { "epoch": 0.3983939326343966, "grad_norm": 0.17480318248271942, "learning_rate": 1.9223979131793627e-05, "loss": 0.52, "step": 1786 }, { "epoch": 0.398616997546286, "grad_norm": 0.1601472944021225, "learning_rate": 1.9223069885571094e-05, "loss": 0.5263, "step": 1787 }, { "epoch": 0.3988400624581753, "grad_norm": 0.16812476515769958, "learning_rate": 1.9222160128516932e-05, "loss": 0.4831, "step": 1788 }, { "epoch": 0.39906312737006466, "grad_norm": 0.1809302121400833, "learning_rate": 1.9221249860681537e-05, "loss": 0.4944, "step": 1789 }, { "epoch": 0.39928619228195406, "grad_norm": 0.16555048525333405, "learning_rate": 1.9220339082115317e-05, "loss": 0.4851, "step": 1790 }, { "epoch": 0.3995092571938434, "grad_norm": 0.16044898331165314, "learning_rate": 1.9219427792868722e-05, "loss": 0.5377, "step": 1791 }, { "epoch": 0.3997323221057328, "grad_norm": 0.16331616044044495, "learning_rate": 1.921851599299222e-05, "loss": 0.4978, "step": 1792 }, { "epoch": 0.3999553870176221, "grad_norm": 0.16315311193466187, "learning_rate": 1.9217603682536315e-05, "loss": 0.51, "step": 1793 }, { "epoch": 0.4001784519295115, "grad_norm": 0.16615985333919525, "learning_rate": 1.9216690861551544e-05, "loss": 0.5343, "step": 1794 }, { "epoch": 0.40040151684140085, "grad_norm": 0.1581142395734787, "learning_rate": 1.9215777530088452e-05, "loss": 0.5276, "step": 1795 }, { "epoch": 0.4006245817532902, "grad_norm": 0.25778672099113464, "learning_rate": 1.9214863688197634e-05, "loss": 0.5265, "step": 1796 }, { "epoch": 0.4008476466651796, "grad_norm": 0.21218818426132202, "learning_rate": 1.92139493359297e-05, "loss": 0.4929, "step": 1797 }, { "epoch": 0.4010707115770689, "grad_norm": 0.1664579063653946, "learning_rate": 1.9213034473335293e-05, "loss": 0.5298, "step": 1798 }, { "epoch": 0.4012937764889583, "grad_norm": 0.158608078956604, "learning_rate": 1.9212119100465084e-05, "loss": 0.513, "step": 1799 }, { "epoch": 0.40151684140084765, "grad_norm": 0.15767832100391388, "learning_rate": 1.9211203217369774e-05, "loss": 0.5037, "step": 1800 }, { "epoch": 0.401739906312737, "grad_norm": 0.1640487164258957, "learning_rate": 1.921028682410009e-05, "loss": 0.5102, "step": 1801 }, { "epoch": 0.4019629712246264, "grad_norm": 0.1980050951242447, "learning_rate": 1.9209369920706783e-05, "loss": 0.4926, "step": 1802 }, { "epoch": 0.4021860361365157, "grad_norm": 0.16443881392478943, "learning_rate": 1.9208452507240642e-05, "loss": 0.5165, "step": 1803 }, { "epoch": 0.4024091010484051, "grad_norm": 0.17739985883235931, "learning_rate": 1.920753458375248e-05, "loss": 0.5141, "step": 1804 }, { "epoch": 0.40263216596029444, "grad_norm": 0.1755398064851761, "learning_rate": 1.9206616150293132e-05, "loss": 0.5279, "step": 1805 }, { "epoch": 0.4028552308721838, "grad_norm": 0.16583852469921112, "learning_rate": 1.9205697206913473e-05, "loss": 0.4838, "step": 1806 }, { "epoch": 0.40307829578407317, "grad_norm": 0.1888515055179596, "learning_rate": 1.9204777753664397e-05, "loss": 0.5381, "step": 1807 }, { "epoch": 0.4033013606959625, "grad_norm": 0.17295043170452118, "learning_rate": 1.9203857790596826e-05, "loss": 0.5062, "step": 1808 }, { "epoch": 0.4035244256078519, "grad_norm": 0.16970674693584442, "learning_rate": 1.9202937317761713e-05, "loss": 0.5138, "step": 1809 }, { "epoch": 0.40374749051974124, "grad_norm": 0.16952745616436005, "learning_rate": 1.9202016335210047e-05, "loss": 0.4829, "step": 1810 }, { "epoch": 0.40397055543163063, "grad_norm": 0.16743586957454681, "learning_rate": 1.9201094842992832e-05, "loss": 0.5085, "step": 1811 }, { "epoch": 0.40419362034351997, "grad_norm": 0.16140130162239075, "learning_rate": 1.9200172841161108e-05, "loss": 0.4983, "step": 1812 }, { "epoch": 0.4044166852554093, "grad_norm": 0.1676797717809677, "learning_rate": 1.9199250329765943e-05, "loss": 0.5055, "step": 1813 }, { "epoch": 0.4046397501672987, "grad_norm": 0.1718011051416397, "learning_rate": 1.9198327308858427e-05, "loss": 0.5021, "step": 1814 }, { "epoch": 0.40486281507918803, "grad_norm": 0.1699821949005127, "learning_rate": 1.9197403778489684e-05, "loss": 0.5312, "step": 1815 }, { "epoch": 0.4050858799910774, "grad_norm": 0.16701120138168335, "learning_rate": 1.9196479738710865e-05, "loss": 0.5028, "step": 1816 }, { "epoch": 0.40530894490296676, "grad_norm": 0.17370650172233582, "learning_rate": 1.9195555189573153e-05, "loss": 0.5075, "step": 1817 }, { "epoch": 0.4055320098148561, "grad_norm": 0.16744117438793182, "learning_rate": 1.919463013112775e-05, "loss": 0.4972, "step": 1818 }, { "epoch": 0.4057550747267455, "grad_norm": 0.1703552007675171, "learning_rate": 1.9193704563425896e-05, "loss": 0.5215, "step": 1819 }, { "epoch": 0.4059781396386348, "grad_norm": 0.1808227002620697, "learning_rate": 1.919277848651885e-05, "loss": 0.5048, "step": 1820 }, { "epoch": 0.4062012045505242, "grad_norm": 0.17191959917545319, "learning_rate": 1.9191851900457905e-05, "loss": 0.5293, "step": 1821 }, { "epoch": 0.40642426946241356, "grad_norm": 0.16464729607105255, "learning_rate": 1.9190924805294388e-05, "loss": 0.531, "step": 1822 }, { "epoch": 0.40664733437430295, "grad_norm": 0.17217296361923218, "learning_rate": 1.9189997201079638e-05, "loss": 0.5221, "step": 1823 }, { "epoch": 0.4068703992861923, "grad_norm": 0.15952859818935394, "learning_rate": 1.918906908786504e-05, "loss": 0.4898, "step": 1824 }, { "epoch": 0.4070934641980816, "grad_norm": 0.15873339772224426, "learning_rate": 1.9188140465701987e-05, "loss": 0.4771, "step": 1825 }, { "epoch": 0.407316529109971, "grad_norm": 0.16631244122982025, "learning_rate": 1.9187211334641923e-05, "loss": 0.5216, "step": 1826 }, { "epoch": 0.40753959402186035, "grad_norm": 0.19430842995643616, "learning_rate": 1.918628169473631e-05, "loss": 0.532, "step": 1827 }, { "epoch": 0.40776265893374974, "grad_norm": 0.1647733449935913, "learning_rate": 1.9185351546036625e-05, "loss": 0.4991, "step": 1828 }, { "epoch": 0.4079857238456391, "grad_norm": 0.17242863774299622, "learning_rate": 1.9184420888594398e-05, "loss": 0.5095, "step": 1829 }, { "epoch": 0.4082087887575284, "grad_norm": 0.15754511952400208, "learning_rate": 1.9183489722461167e-05, "loss": 0.5185, "step": 1830 }, { "epoch": 0.4084318536694178, "grad_norm": 0.16371309757232666, "learning_rate": 1.918255804768851e-05, "loss": 0.5146, "step": 1831 }, { "epoch": 0.40865491858130715, "grad_norm": 0.17599989473819733, "learning_rate": 1.918162586432803e-05, "loss": 0.5509, "step": 1832 }, { "epoch": 0.40887798349319654, "grad_norm": 0.17950280010700226, "learning_rate": 1.9180693172431353e-05, "loss": 0.5236, "step": 1833 }, { "epoch": 0.4091010484050859, "grad_norm": 0.1845085769891739, "learning_rate": 1.917975997205014e-05, "loss": 0.5155, "step": 1834 }, { "epoch": 0.4093241133169752, "grad_norm": 0.16801810264587402, "learning_rate": 1.9178826263236076e-05, "loss": 0.5265, "step": 1835 }, { "epoch": 0.4095471782288646, "grad_norm": 0.16314861178398132, "learning_rate": 1.9177892046040875e-05, "loss": 0.4922, "step": 1836 }, { "epoch": 0.40977024314075394, "grad_norm": 0.16442064940929413, "learning_rate": 1.9176957320516287e-05, "loss": 0.5004, "step": 1837 }, { "epoch": 0.40999330805264333, "grad_norm": 0.16156339645385742, "learning_rate": 1.917602208671407e-05, "loss": 0.5172, "step": 1838 }, { "epoch": 0.41021637296453267, "grad_norm": 0.17251324653625488, "learning_rate": 1.9175086344686035e-05, "loss": 0.5432, "step": 1839 }, { "epoch": 0.41043943787642206, "grad_norm": 0.1891004890203476, "learning_rate": 1.9174150094484e-05, "loss": 0.5276, "step": 1840 }, { "epoch": 0.4106625027883114, "grad_norm": 0.1750495731830597, "learning_rate": 1.917321333615983e-05, "loss": 0.548, "step": 1841 }, { "epoch": 0.41088556770020074, "grad_norm": 0.1698615849018097, "learning_rate": 1.91722760697654e-05, "loss": 0.5276, "step": 1842 }, { "epoch": 0.41110863261209013, "grad_norm": 0.16396887600421906, "learning_rate": 1.917133829535263e-05, "loss": 0.5211, "step": 1843 }, { "epoch": 0.41133169752397947, "grad_norm": 0.18432539701461792, "learning_rate": 1.917040001297345e-05, "loss": 0.5276, "step": 1844 }, { "epoch": 0.41155476243586886, "grad_norm": 0.17017246782779694, "learning_rate": 1.9169461222679836e-05, "loss": 0.5312, "step": 1845 }, { "epoch": 0.4117778273477582, "grad_norm": 0.16993820667266846, "learning_rate": 1.9168521924523782e-05, "loss": 0.4981, "step": 1846 }, { "epoch": 0.41200089225964753, "grad_norm": 0.1594426929950714, "learning_rate": 1.916758211855731e-05, "loss": 0.5162, "step": 1847 }, { "epoch": 0.4122239571715369, "grad_norm": 0.16228000819683075, "learning_rate": 1.9166641804832474e-05, "loss": 0.5086, "step": 1848 }, { "epoch": 0.41244702208342626, "grad_norm": 0.2065616101026535, "learning_rate": 1.9165700983401354e-05, "loss": 0.4916, "step": 1849 }, { "epoch": 0.41267008699531565, "grad_norm": 0.16846513748168945, "learning_rate": 1.916475965431606e-05, "loss": 0.5246, "step": 1850 }, { "epoch": 0.412893151907205, "grad_norm": 0.19557945430278778, "learning_rate": 1.9163817817628728e-05, "loss": 0.5208, "step": 1851 }, { "epoch": 0.4131162168190944, "grad_norm": 0.17081156373023987, "learning_rate": 1.916287547339152e-05, "loss": 0.5534, "step": 1852 }, { "epoch": 0.4133392817309837, "grad_norm": 0.16580162942409515, "learning_rate": 1.9161932621656634e-05, "loss": 0.5152, "step": 1853 }, { "epoch": 0.41356234664287306, "grad_norm": 0.18416714668273926, "learning_rate": 1.9160989262476288e-05, "loss": 0.5064, "step": 1854 }, { "epoch": 0.41378541155476245, "grad_norm": 0.16161711513996124, "learning_rate": 1.916004539590273e-05, "loss": 0.5008, "step": 1855 }, { "epoch": 0.4140084764666518, "grad_norm": 0.17786547541618347, "learning_rate": 1.9159101021988244e-05, "loss": 0.5168, "step": 1856 }, { "epoch": 0.4142315413785412, "grad_norm": 0.15692083537578583, "learning_rate": 1.9158156140785125e-05, "loss": 0.5026, "step": 1857 }, { "epoch": 0.4144546062904305, "grad_norm": 0.22050227224826813, "learning_rate": 1.9157210752345713e-05, "loss": 0.4936, "step": 1858 }, { "epoch": 0.41467767120231985, "grad_norm": 0.20180164277553558, "learning_rate": 1.915626485672237e-05, "loss": 0.5347, "step": 1859 }, { "epoch": 0.41490073611420925, "grad_norm": 0.156357541680336, "learning_rate": 1.9155318453967483e-05, "loss": 0.4816, "step": 1860 }, { "epoch": 0.4151238010260986, "grad_norm": 0.16347983479499817, "learning_rate": 1.9154371544133472e-05, "loss": 0.5032, "step": 1861 }, { "epoch": 0.415346865937988, "grad_norm": 0.16571176052093506, "learning_rate": 1.9153424127272783e-05, "loss": 0.4875, "step": 1862 }, { "epoch": 0.4155699308498773, "grad_norm": 0.16103731095790863, "learning_rate": 1.9152476203437884e-05, "loss": 0.5266, "step": 1863 }, { "epoch": 0.41579299576176665, "grad_norm": 0.17397384345531464, "learning_rate": 1.915152777268128e-05, "loss": 0.5114, "step": 1864 }, { "epoch": 0.41601606067365604, "grad_norm": 0.16579222679138184, "learning_rate": 1.9150578835055507e-05, "loss": 0.5219, "step": 1865 }, { "epoch": 0.4162391255855454, "grad_norm": 0.1724756360054016, "learning_rate": 1.914962939061312e-05, "loss": 0.4942, "step": 1866 }, { "epoch": 0.41646219049743477, "grad_norm": 0.16709351539611816, "learning_rate": 1.9148679439406704e-05, "loss": 0.514, "step": 1867 }, { "epoch": 0.4166852554093241, "grad_norm": 0.15628811717033386, "learning_rate": 1.914772898148887e-05, "loss": 0.5153, "step": 1868 }, { "epoch": 0.4169083203212135, "grad_norm": 0.16881687939167023, "learning_rate": 1.914677801691226e-05, "loss": 0.4924, "step": 1869 }, { "epoch": 0.41713138523310284, "grad_norm": 0.16054266691207886, "learning_rate": 1.9145826545729555e-05, "loss": 0.5238, "step": 1870 }, { "epoch": 0.4173544501449922, "grad_norm": 0.1636224240064621, "learning_rate": 1.9144874567993446e-05, "loss": 0.4813, "step": 1871 }, { "epoch": 0.41757751505688157, "grad_norm": 0.16189956665039062, "learning_rate": 1.9143922083756656e-05, "loss": 0.5211, "step": 1872 }, { "epoch": 0.4178005799687709, "grad_norm": 0.16065020859241486, "learning_rate": 1.9142969093071944e-05, "loss": 0.5116, "step": 1873 }, { "epoch": 0.4180236448806603, "grad_norm": 0.17001986503601074, "learning_rate": 1.9142015595992096e-05, "loss": 0.5202, "step": 1874 }, { "epoch": 0.41824670979254963, "grad_norm": 0.16956806182861328, "learning_rate": 1.9141061592569913e-05, "loss": 0.4941, "step": 1875 }, { "epoch": 0.41846977470443897, "grad_norm": 0.16168902814388275, "learning_rate": 1.9140107082858243e-05, "loss": 0.5134, "step": 1876 }, { "epoch": 0.41869283961632836, "grad_norm": 0.16588400304317474, "learning_rate": 1.9139152066909948e-05, "loss": 0.4899, "step": 1877 }, { "epoch": 0.4189159045282177, "grad_norm": 0.16289415955543518, "learning_rate": 1.9138196544777925e-05, "loss": 0.482, "step": 1878 }, { "epoch": 0.4191389694401071, "grad_norm": 0.17892742156982422, "learning_rate": 1.9137240516515094e-05, "loss": 0.5178, "step": 1879 }, { "epoch": 0.4193620343519964, "grad_norm": 0.17155860364437103, "learning_rate": 1.913628398217441e-05, "loss": 0.5099, "step": 1880 }, { "epoch": 0.41958509926388576, "grad_norm": 0.1801706701517105, "learning_rate": 1.913532694180885e-05, "loss": 0.4966, "step": 1881 }, { "epoch": 0.41980816417577516, "grad_norm": 0.16115552186965942, "learning_rate": 1.9134369395471416e-05, "loss": 0.4839, "step": 1882 }, { "epoch": 0.4200312290876645, "grad_norm": 0.16582123935222626, "learning_rate": 1.913341134321515e-05, "loss": 0.4719, "step": 1883 }, { "epoch": 0.4202542939995539, "grad_norm": 0.16009938716888428, "learning_rate": 1.9132452785093113e-05, "loss": 0.5096, "step": 1884 }, { "epoch": 0.4204773589114432, "grad_norm": 0.17301099002361298, "learning_rate": 1.9131493721158395e-05, "loss": 0.5295, "step": 1885 }, { "epoch": 0.4207004238233326, "grad_norm": 0.16522350907325745, "learning_rate": 1.9130534151464116e-05, "loss": 0.5386, "step": 1886 }, { "epoch": 0.42092348873522195, "grad_norm": 0.1624733954668045, "learning_rate": 1.9129574076063423e-05, "loss": 0.5226, "step": 1887 }, { "epoch": 0.4211465536471113, "grad_norm": 0.16734281182289124, "learning_rate": 1.9128613495009487e-05, "loss": 0.5367, "step": 1888 }, { "epoch": 0.4213696185590007, "grad_norm": 0.15870822966098785, "learning_rate": 1.912765240835552e-05, "loss": 0.4597, "step": 1889 }, { "epoch": 0.42159268347089, "grad_norm": 0.16024811565876007, "learning_rate": 1.912669081615474e-05, "loss": 0.4794, "step": 1890 }, { "epoch": 0.4218157483827794, "grad_norm": 0.16861672699451447, "learning_rate": 1.912572871846042e-05, "loss": 0.4918, "step": 1891 }, { "epoch": 0.42203881329466875, "grad_norm": 0.16772149503231049, "learning_rate": 1.9124766115325837e-05, "loss": 0.5262, "step": 1892 }, { "epoch": 0.4222618782065581, "grad_norm": 0.16915294528007507, "learning_rate": 1.912380300680431e-05, "loss": 0.4926, "step": 1893 }, { "epoch": 0.4224849431184475, "grad_norm": 0.16086746752262115, "learning_rate": 1.912283939294918e-05, "loss": 0.5091, "step": 1894 }, { "epoch": 0.4227080080303368, "grad_norm": 0.18025851249694824, "learning_rate": 1.912187527381382e-05, "loss": 0.5147, "step": 1895 }, { "epoch": 0.4229310729422262, "grad_norm": 0.15781170129776, "learning_rate": 1.9120910649451632e-05, "loss": 0.501, "step": 1896 }, { "epoch": 0.42315413785411554, "grad_norm": 0.15790554881095886, "learning_rate": 1.9119945519916036e-05, "loss": 0.4963, "step": 1897 }, { "epoch": 0.42337720276600493, "grad_norm": 0.16987471282482147, "learning_rate": 1.9118979885260493e-05, "loss": 0.5148, "step": 1898 }, { "epoch": 0.42360026767789427, "grad_norm": 0.16438226401805878, "learning_rate": 1.9118013745538483e-05, "loss": 0.5348, "step": 1899 }, { "epoch": 0.4238233325897836, "grad_norm": 0.18299153447151184, "learning_rate": 1.9117047100803513e-05, "loss": 0.5055, "step": 1900 }, { "epoch": 0.424046397501673, "grad_norm": 0.18117006123065948, "learning_rate": 1.911607995110913e-05, "loss": 0.517, "step": 1901 }, { "epoch": 0.42426946241356234, "grad_norm": 0.19496211409568787, "learning_rate": 1.9115112296508896e-05, "loss": 0.5216, "step": 1902 }, { "epoch": 0.42449252732545173, "grad_norm": 0.17908519506454468, "learning_rate": 1.9114144137056406e-05, "loss": 0.5386, "step": 1903 }, { "epoch": 0.42471559223734107, "grad_norm": 0.18684661388397217, "learning_rate": 1.9113175472805284e-05, "loss": 0.5341, "step": 1904 }, { "epoch": 0.4249386571492304, "grad_norm": 0.1662161499261856, "learning_rate": 1.9112206303809183e-05, "loss": 0.4824, "step": 1905 }, { "epoch": 0.4251617220611198, "grad_norm": 0.1687028408050537, "learning_rate": 1.9111236630121775e-05, "loss": 0.5026, "step": 1906 }, { "epoch": 0.42538478697300913, "grad_norm": 0.20177949965000153, "learning_rate": 1.9110266451796772e-05, "loss": 0.5112, "step": 1907 }, { "epoch": 0.4256078518848985, "grad_norm": 0.16266430914402008, "learning_rate": 1.9109295768887907e-05, "loss": 0.5011, "step": 1908 }, { "epoch": 0.42583091679678786, "grad_norm": 0.1639285683631897, "learning_rate": 1.910832458144894e-05, "loss": 0.5147, "step": 1909 }, { "epoch": 0.4260539817086772, "grad_norm": 0.16959701478481293, "learning_rate": 1.9107352889533667e-05, "loss": 0.513, "step": 1910 }, { "epoch": 0.4262770466205666, "grad_norm": 0.16716282069683075, "learning_rate": 1.9106380693195903e-05, "loss": 0.5023, "step": 1911 }, { "epoch": 0.42650011153245593, "grad_norm": 0.16967961192131042, "learning_rate": 1.9105407992489495e-05, "loss": 0.5185, "step": 1912 }, { "epoch": 0.4267231764443453, "grad_norm": 0.16723939776420593, "learning_rate": 1.9104434787468316e-05, "loss": 0.5013, "step": 1913 }, { "epoch": 0.42694624135623466, "grad_norm": 0.16064821183681488, "learning_rate": 1.9103461078186268e-05, "loss": 0.5109, "step": 1914 }, { "epoch": 0.42716930626812405, "grad_norm": 0.17482173442840576, "learning_rate": 1.9102486864697285e-05, "loss": 0.5148, "step": 1915 }, { "epoch": 0.4273923711800134, "grad_norm": 0.1638958603143692, "learning_rate": 1.910151214705532e-05, "loss": 0.527, "step": 1916 }, { "epoch": 0.4276154360919027, "grad_norm": 0.18999075889587402, "learning_rate": 1.9100536925314363e-05, "loss": 0.5, "step": 1917 }, { "epoch": 0.4278385010037921, "grad_norm": 0.18148307502269745, "learning_rate": 1.9099561199528425e-05, "loss": 0.4862, "step": 1918 }, { "epoch": 0.42806156591568145, "grad_norm": 0.17788200080394745, "learning_rate": 1.909858496975155e-05, "loss": 0.4991, "step": 1919 }, { "epoch": 0.42828463082757084, "grad_norm": 0.1629716157913208, "learning_rate": 1.9097608236037813e-05, "loss": 0.5292, "step": 1920 }, { "epoch": 0.4285076957394602, "grad_norm": 0.1686851680278778, "learning_rate": 1.9096630998441298e-05, "loss": 0.5156, "step": 1921 }, { "epoch": 0.4287307606513495, "grad_norm": 0.1803978681564331, "learning_rate": 1.909565325701614e-05, "loss": 0.5193, "step": 1922 }, { "epoch": 0.4289538255632389, "grad_norm": 0.17601296305656433, "learning_rate": 1.9094675011816496e-05, "loss": 0.5193, "step": 1923 }, { "epoch": 0.42917689047512825, "grad_norm": 0.17354577779769897, "learning_rate": 1.9093696262896535e-05, "loss": 0.5055, "step": 1924 }, { "epoch": 0.42939995538701764, "grad_norm": 0.1659156084060669, "learning_rate": 1.9092717010310476e-05, "loss": 0.5232, "step": 1925 }, { "epoch": 0.429623020298907, "grad_norm": 0.1605677753686905, "learning_rate": 1.909173725411255e-05, "loss": 0.4916, "step": 1926 }, { "epoch": 0.42984608521079637, "grad_norm": 0.17374806106090546, "learning_rate": 1.9090756994357035e-05, "loss": 0.5259, "step": 1927 }, { "epoch": 0.4300691501226857, "grad_norm": 0.16589613258838654, "learning_rate": 1.9089776231098204e-05, "loss": 0.5192, "step": 1928 }, { "epoch": 0.43029221503457504, "grad_norm": 0.16348575055599213, "learning_rate": 1.9088794964390395e-05, "loss": 0.4963, "step": 1929 }, { "epoch": 0.43051527994646444, "grad_norm": 0.16494914889335632, "learning_rate": 1.9087813194287948e-05, "loss": 0.5359, "step": 1930 }, { "epoch": 0.43073834485835377, "grad_norm": 0.16134381294250488, "learning_rate": 1.9086830920845242e-05, "loss": 0.5087, "step": 1931 }, { "epoch": 0.43096140977024316, "grad_norm": 0.16159029304981232, "learning_rate": 1.908584814411668e-05, "loss": 0.5242, "step": 1932 }, { "epoch": 0.4311844746821325, "grad_norm": 0.1648871749639511, "learning_rate": 1.9084864864156696e-05, "loss": 0.5069, "step": 1933 }, { "epoch": 0.43140753959402184, "grad_norm": 0.17623218894004822, "learning_rate": 1.9083881081019752e-05, "loss": 0.5049, "step": 1934 }, { "epoch": 0.43163060450591123, "grad_norm": 0.166509211063385, "learning_rate": 1.9082896794760327e-05, "loss": 0.4966, "step": 1935 }, { "epoch": 0.43185366941780057, "grad_norm": 0.15902282297611237, "learning_rate": 1.908191200543295e-05, "loss": 0.5343, "step": 1936 }, { "epoch": 0.43207673432968996, "grad_norm": 0.16570866107940674, "learning_rate": 1.908092671309216e-05, "loss": 0.4797, "step": 1937 }, { "epoch": 0.4322997992415793, "grad_norm": 0.15843339264392853, "learning_rate": 1.9079940917792524e-05, "loss": 0.5359, "step": 1938 }, { "epoch": 0.43252286415346863, "grad_norm": 0.16115322709083557, "learning_rate": 1.9078954619588645e-05, "loss": 0.5032, "step": 1939 }, { "epoch": 0.432745929065358, "grad_norm": 0.16374099254608154, "learning_rate": 1.9077967818535153e-05, "loss": 0.4998, "step": 1940 }, { "epoch": 0.43296899397724736, "grad_norm": 0.17378082871437073, "learning_rate": 1.9076980514686695e-05, "loss": 0.5403, "step": 1941 }, { "epoch": 0.43319205888913676, "grad_norm": 0.16688857972621918, "learning_rate": 1.9075992708097965e-05, "loss": 0.5103, "step": 1942 }, { "epoch": 0.4334151238010261, "grad_norm": 0.16233094036579132, "learning_rate": 1.9075004398823665e-05, "loss": 0.5045, "step": 1943 }, { "epoch": 0.4336381887129155, "grad_norm": 0.16693229973316193, "learning_rate": 1.907401558691854e-05, "loss": 0.5167, "step": 1944 }, { "epoch": 0.4338612536248048, "grad_norm": 0.16341574490070343, "learning_rate": 1.9073026272437353e-05, "loss": 0.5001, "step": 1945 }, { "epoch": 0.43408431853669416, "grad_norm": 0.16043853759765625, "learning_rate": 1.90720364554349e-05, "loss": 0.5109, "step": 1946 }, { "epoch": 0.43430738344858355, "grad_norm": 0.15996608138084412, "learning_rate": 1.9071046135966e-05, "loss": 0.4826, "step": 1947 }, { "epoch": 0.4345304483604729, "grad_norm": 0.15902268886566162, "learning_rate": 1.9070055314085508e-05, "loss": 0.4989, "step": 1948 }, { "epoch": 0.4347535132723623, "grad_norm": 0.16526652872562408, "learning_rate": 1.9069063989848298e-05, "loss": 0.5209, "step": 1949 }, { "epoch": 0.4349765781842516, "grad_norm": 0.16759642958641052, "learning_rate": 1.9068072163309282e-05, "loss": 0.5246, "step": 1950 }, { "epoch": 0.43519964309614095, "grad_norm": 0.17135794460773468, "learning_rate": 1.9067079834523387e-05, "loss": 0.5198, "step": 1951 }, { "epoch": 0.43542270800803035, "grad_norm": 0.16336077451705933, "learning_rate": 1.9066087003545576e-05, "loss": 0.5098, "step": 1952 }, { "epoch": 0.4356457729199197, "grad_norm": 0.1512858122587204, "learning_rate": 1.9065093670430836e-05, "loss": 0.4843, "step": 1953 }, { "epoch": 0.4358688378318091, "grad_norm": 0.17053045332431793, "learning_rate": 1.9064099835234188e-05, "loss": 0.527, "step": 1954 }, { "epoch": 0.4360919027436984, "grad_norm": 0.16126702725887299, "learning_rate": 1.9063105498010678e-05, "loss": 0.5222, "step": 1955 }, { "epoch": 0.43631496765558775, "grad_norm": 0.1633397489786148, "learning_rate": 1.9062110658815375e-05, "loss": 0.5124, "step": 1956 }, { "epoch": 0.43653803256747714, "grad_norm": 0.18700194358825684, "learning_rate": 1.9061115317703384e-05, "loss": 0.5123, "step": 1957 }, { "epoch": 0.4367610974793665, "grad_norm": 0.1907283067703247, "learning_rate": 1.9060119474729826e-05, "loss": 0.5223, "step": 1958 }, { "epoch": 0.43698416239125587, "grad_norm": 0.1634737253189087, "learning_rate": 1.9059123129949865e-05, "loss": 0.4879, "step": 1959 }, { "epoch": 0.4372072273031452, "grad_norm": 0.18314914405345917, "learning_rate": 1.9058126283418675e-05, "loss": 0.5018, "step": 1960 }, { "epoch": 0.4374302922150346, "grad_norm": 0.1680321991443634, "learning_rate": 1.9057128935191477e-05, "loss": 0.4975, "step": 1961 }, { "epoch": 0.43765335712692394, "grad_norm": 0.1635400801897049, "learning_rate": 1.9056131085323506e-05, "loss": 0.4895, "step": 1962 }, { "epoch": 0.4378764220388133, "grad_norm": 0.15011341869831085, "learning_rate": 1.905513273387003e-05, "loss": 0.4765, "step": 1963 }, { "epoch": 0.43809948695070267, "grad_norm": 0.16991405189037323, "learning_rate": 1.9054133880886348e-05, "loss": 0.518, "step": 1964 }, { "epoch": 0.438322551862592, "grad_norm": 0.17523062229156494, "learning_rate": 1.9053134526427777e-05, "loss": 0.4764, "step": 1965 }, { "epoch": 0.4385456167744814, "grad_norm": 0.1695917248725891, "learning_rate": 1.905213467054967e-05, "loss": 0.5111, "step": 1966 }, { "epoch": 0.43876868168637073, "grad_norm": 0.16954153776168823, "learning_rate": 1.90511343133074e-05, "loss": 0.505, "step": 1967 }, { "epoch": 0.43899174659826007, "grad_norm": 0.3032020628452301, "learning_rate": 1.905013345475638e-05, "loss": 0.5037, "step": 1968 }, { "epoch": 0.43921481151014946, "grad_norm": 0.16867168247699738, "learning_rate": 1.9049132094952046e-05, "loss": 0.5596, "step": 1969 }, { "epoch": 0.4394378764220388, "grad_norm": 0.16780449450016022, "learning_rate": 1.904813023394985e-05, "loss": 0.4937, "step": 1970 }, { "epoch": 0.4396609413339282, "grad_norm": 0.19413304328918457, "learning_rate": 1.904712787180529e-05, "loss": 0.4823, "step": 1971 }, { "epoch": 0.4398840062458175, "grad_norm": 0.16119445860385895, "learning_rate": 1.9046125008573876e-05, "loss": 0.5451, "step": 1972 }, { "epoch": 0.4401070711577069, "grad_norm": 0.1607835441827774, "learning_rate": 1.904512164431116e-05, "loss": 0.5066, "step": 1973 }, { "epoch": 0.44033013606959626, "grad_norm": 0.1697167158126831, "learning_rate": 1.9044117779072708e-05, "loss": 0.5166, "step": 1974 }, { "epoch": 0.4405532009814856, "grad_norm": 0.16189232468605042, "learning_rate": 1.9043113412914128e-05, "loss": 0.4968, "step": 1975 }, { "epoch": 0.440776265893375, "grad_norm": 0.1717143952846527, "learning_rate": 1.904210854589104e-05, "loss": 0.481, "step": 1976 }, { "epoch": 0.4409993308052643, "grad_norm": 0.17401723563671112, "learning_rate": 1.9041103178059107e-05, "loss": 0.526, "step": 1977 }, { "epoch": 0.4412223957171537, "grad_norm": 0.16598494350910187, "learning_rate": 1.9040097309474007e-05, "loss": 0.5195, "step": 1978 }, { "epoch": 0.44144546062904305, "grad_norm": 0.21129997074604034, "learning_rate": 1.9039090940191455e-05, "loss": 0.5096, "step": 1979 }, { "epoch": 0.4416685255409324, "grad_norm": 0.17546051740646362, "learning_rate": 1.9038084070267186e-05, "loss": 0.5313, "step": 1980 }, { "epoch": 0.4418915904528218, "grad_norm": 0.15967412292957306, "learning_rate": 1.9037076699756973e-05, "loss": 0.5183, "step": 1981 }, { "epoch": 0.4421146553647111, "grad_norm": 0.17214882373809814, "learning_rate": 1.9036068828716603e-05, "loss": 0.5091, "step": 1982 }, { "epoch": 0.4423377202766005, "grad_norm": 0.184258371591568, "learning_rate": 1.9035060457201904e-05, "loss": 0.4785, "step": 1983 }, { "epoch": 0.44256078518848985, "grad_norm": 0.1570109874010086, "learning_rate": 1.9034051585268725e-05, "loss": 0.515, "step": 1984 }, { "epoch": 0.4427838501003792, "grad_norm": 0.17051473259925842, "learning_rate": 1.903304221297294e-05, "loss": 0.5031, "step": 1985 }, { "epoch": 0.4430069150122686, "grad_norm": 0.19998317956924438, "learning_rate": 1.903203234037046e-05, "loss": 0.499, "step": 1986 }, { "epoch": 0.4432299799241579, "grad_norm": 0.16017475724220276, "learning_rate": 1.9031021967517213e-05, "loss": 0.5237, "step": 1987 }, { "epoch": 0.4434530448360473, "grad_norm": 0.18142235279083252, "learning_rate": 1.9030011094469164e-05, "loss": 0.5373, "step": 1988 }, { "epoch": 0.44367610974793664, "grad_norm": 0.16957718133926392, "learning_rate": 1.90289997212823e-05, "loss": 0.5038, "step": 1989 }, { "epoch": 0.44389917465982603, "grad_norm": 0.16631929576396942, "learning_rate": 1.9027987848012635e-05, "loss": 0.4873, "step": 1990 }, { "epoch": 0.44412223957171537, "grad_norm": 0.16362012922763824, "learning_rate": 1.9026975474716215e-05, "loss": 0.5204, "step": 1991 }, { "epoch": 0.4443453044836047, "grad_norm": 0.39765313267707825, "learning_rate": 1.902596260144911e-05, "loss": 0.4736, "step": 1992 }, { "epoch": 0.4445683693954941, "grad_norm": 0.17083793878555298, "learning_rate": 1.9024949228267423e-05, "loss": 0.5152, "step": 1993 }, { "epoch": 0.44479143430738344, "grad_norm": 0.1833147555589676, "learning_rate": 1.902393535522728e-05, "loss": 0.536, "step": 1994 }, { "epoch": 0.44501449921927283, "grad_norm": 0.17427003383636475, "learning_rate": 1.902292098238483e-05, "loss": 0.514, "step": 1995 }, { "epoch": 0.44523756413116217, "grad_norm": 0.15868180990219116, "learning_rate": 1.902190610979626e-05, "loss": 0.4869, "step": 1996 }, { "epoch": 0.4454606290430515, "grad_norm": 0.17407076060771942, "learning_rate": 1.9020890737517783e-05, "loss": 0.5134, "step": 1997 }, { "epoch": 0.4456836939549409, "grad_norm": 0.16776099801063538, "learning_rate": 1.901987486560563e-05, "loss": 0.5079, "step": 1998 }, { "epoch": 0.44590675886683023, "grad_norm": 0.2472338080406189, "learning_rate": 1.9018858494116074e-05, "loss": 0.5128, "step": 1999 }, { "epoch": 0.4461298237787196, "grad_norm": 0.16303667426109314, "learning_rate": 1.90178416231054e-05, "loss": 0.5225, "step": 2000 }, { "epoch": 0.44635288869060896, "grad_norm": 0.17127907276153564, "learning_rate": 1.901682425262993e-05, "loss": 0.5092, "step": 2001 }, { "epoch": 0.44657595360249835, "grad_norm": 0.17813526093959808, "learning_rate": 1.9015806382746018e-05, "loss": 0.5, "step": 2002 }, { "epoch": 0.4467990185143877, "grad_norm": 0.1653776615858078, "learning_rate": 1.901478801351004e-05, "loss": 0.5051, "step": 2003 }, { "epoch": 0.44702208342627703, "grad_norm": 0.16295598447322845, "learning_rate": 1.9013769144978392e-05, "loss": 0.4955, "step": 2004 }, { "epoch": 0.4472451483381664, "grad_norm": 0.18259268999099731, "learning_rate": 1.901274977720751e-05, "loss": 0.5504, "step": 2005 }, { "epoch": 0.44746821325005576, "grad_norm": 0.19138063490390778, "learning_rate": 1.9011729910253856e-05, "loss": 0.5129, "step": 2006 }, { "epoch": 0.44769127816194515, "grad_norm": 0.1588236689567566, "learning_rate": 1.9010709544173913e-05, "loss": 0.5078, "step": 2007 }, { "epoch": 0.4479143430738345, "grad_norm": 0.162478968501091, "learning_rate": 1.900968867902419e-05, "loss": 0.5121, "step": 2008 }, { "epoch": 0.4481374079857238, "grad_norm": 0.17752927541732788, "learning_rate": 1.900866731486124e-05, "loss": 0.4983, "step": 2009 }, { "epoch": 0.4483604728976132, "grad_norm": 0.17127980291843414, "learning_rate": 1.900764545174163e-05, "loss": 0.5347, "step": 2010 }, { "epoch": 0.44858353780950255, "grad_norm": 0.1686343103647232, "learning_rate": 1.900662308972195e-05, "loss": 0.5107, "step": 2011 }, { "epoch": 0.44880660272139195, "grad_norm": 0.16008520126342773, "learning_rate": 1.9005600228858832e-05, "loss": 0.5036, "step": 2012 }, { "epoch": 0.4490296676332813, "grad_norm": 0.16494229435920715, "learning_rate": 1.9004576869208922e-05, "loss": 0.5135, "step": 2013 }, { "epoch": 0.4492527325451706, "grad_norm": 0.17318038642406464, "learning_rate": 1.9003553010828906e-05, "loss": 0.5279, "step": 2014 }, { "epoch": 0.44947579745706, "grad_norm": 0.252083420753479, "learning_rate": 1.9002528653775492e-05, "loss": 0.5176, "step": 2015 }, { "epoch": 0.44969886236894935, "grad_norm": 0.1737111508846283, "learning_rate": 1.900150379810541e-05, "loss": 0.5051, "step": 2016 }, { "epoch": 0.44992192728083874, "grad_norm": 0.172331303358078, "learning_rate": 1.9000478443875427e-05, "loss": 0.5161, "step": 2017 }, { "epoch": 0.4501449921927281, "grad_norm": 0.17037436366081238, "learning_rate": 1.899945259114233e-05, "loss": 0.5282, "step": 2018 }, { "epoch": 0.45036805710461747, "grad_norm": 0.16774620115756989, "learning_rate": 1.8998426239962945e-05, "loss": 0.5056, "step": 2019 }, { "epoch": 0.4505911220165068, "grad_norm": 0.17856718599796295, "learning_rate": 1.899739939039411e-05, "loss": 0.514, "step": 2020 }, { "epoch": 0.45081418692839614, "grad_norm": 0.18140466511249542, "learning_rate": 1.89963720424927e-05, "loss": 0.5372, "step": 2021 }, { "epoch": 0.45103725184028554, "grad_norm": 0.1641564816236496, "learning_rate": 1.8995344196315618e-05, "loss": 0.5205, "step": 2022 }, { "epoch": 0.4512603167521749, "grad_norm": 0.16866321861743927, "learning_rate": 1.899431585191979e-05, "loss": 0.4906, "step": 2023 }, { "epoch": 0.45148338166406426, "grad_norm": 0.16418704390525818, "learning_rate": 1.8993287009362175e-05, "loss": 0.5275, "step": 2024 }, { "epoch": 0.4517064465759536, "grad_norm": 0.15996740758419037, "learning_rate": 1.8992257668699756e-05, "loss": 0.5124, "step": 2025 }, { "epoch": 0.45192951148784294, "grad_norm": 0.16520540416240692, "learning_rate": 1.899122782998954e-05, "loss": 0.5045, "step": 2026 }, { "epoch": 0.45215257639973233, "grad_norm": 0.16536462306976318, "learning_rate": 1.8990197493288575e-05, "loss": 0.5003, "step": 2027 }, { "epoch": 0.45237564131162167, "grad_norm": 0.16777758300304413, "learning_rate": 1.8989166658653916e-05, "loss": 0.5443, "step": 2028 }, { "epoch": 0.45259870622351106, "grad_norm": 0.16215604543685913, "learning_rate": 1.8988135326142668e-05, "loss": 0.4965, "step": 2029 }, { "epoch": 0.4528217711354004, "grad_norm": 0.1622023582458496, "learning_rate": 1.8987103495811947e-05, "loss": 0.4606, "step": 2030 }, { "epoch": 0.45304483604728973, "grad_norm": 0.16461379826068878, "learning_rate": 1.8986071167718902e-05, "loss": 0.5273, "step": 2031 }, { "epoch": 0.4532679009591791, "grad_norm": 0.16976390779018402, "learning_rate": 1.8985038341920715e-05, "loss": 0.4889, "step": 2032 }, { "epoch": 0.45349096587106846, "grad_norm": 0.1569637656211853, "learning_rate": 1.898400501847458e-05, "loss": 0.4828, "step": 2033 }, { "epoch": 0.45371403078295786, "grad_norm": 0.16640910506248474, "learning_rate": 1.898297119743774e-05, "loss": 0.5351, "step": 2034 }, { "epoch": 0.4539370956948472, "grad_norm": 0.1644599735736847, "learning_rate": 1.898193687886745e-05, "loss": 0.4869, "step": 2035 }, { "epoch": 0.4541601606067366, "grad_norm": 0.16732025146484375, "learning_rate": 1.8980902062820997e-05, "loss": 0.4893, "step": 2036 }, { "epoch": 0.4543832255186259, "grad_norm": 0.16019105911254883, "learning_rate": 1.8979866749355694e-05, "loss": 0.5038, "step": 2037 }, { "epoch": 0.45460629043051526, "grad_norm": 0.17239058017730713, "learning_rate": 1.8978830938528884e-05, "loss": 0.5106, "step": 2038 }, { "epoch": 0.45482935534240465, "grad_norm": 0.15983308851718903, "learning_rate": 1.8977794630397942e-05, "loss": 0.5163, "step": 2039 }, { "epoch": 0.455052420254294, "grad_norm": 0.16848912835121155, "learning_rate": 1.8976757825020255e-05, "loss": 0.5462, "step": 2040 }, { "epoch": 0.4552754851661834, "grad_norm": 0.15303179621696472, "learning_rate": 1.897572052245326e-05, "loss": 0.4594, "step": 2041 }, { "epoch": 0.4554985500780727, "grad_norm": 0.17675776779651642, "learning_rate": 1.8974682722754397e-05, "loss": 0.5385, "step": 2042 }, { "epoch": 0.45572161498996205, "grad_norm": 0.17349328100681305, "learning_rate": 1.8973644425981154e-05, "loss": 0.4953, "step": 2043 }, { "epoch": 0.45594467990185145, "grad_norm": 0.15753738582134247, "learning_rate": 1.897260563219104e-05, "loss": 0.4851, "step": 2044 }, { "epoch": 0.4561677448137408, "grad_norm": 0.16278420388698578, "learning_rate": 1.897156634144158e-05, "loss": 0.4994, "step": 2045 }, { "epoch": 0.4563908097256302, "grad_norm": 0.1592012643814087, "learning_rate": 1.8970526553790346e-05, "loss": 0.513, "step": 2046 }, { "epoch": 0.4566138746375195, "grad_norm": 0.18133483827114105, "learning_rate": 1.8969486269294922e-05, "loss": 0.4734, "step": 2047 }, { "epoch": 0.4568369395494089, "grad_norm": 0.170905202627182, "learning_rate": 1.8968445488012933e-05, "loss": 0.5401, "step": 2048 }, { "epoch": 0.45706000446129824, "grad_norm": 0.16522808372974396, "learning_rate": 1.8967404210002014e-05, "loss": 0.5008, "step": 2049 }, { "epoch": 0.4572830693731876, "grad_norm": 0.18945086002349854, "learning_rate": 1.8966362435319845e-05, "loss": 0.5064, "step": 2050 }, { "epoch": 0.45750613428507697, "grad_norm": 0.17545610666275024, "learning_rate": 1.8965320164024123e-05, "loss": 0.5105, "step": 2051 }, { "epoch": 0.4577291991969663, "grad_norm": 0.1666799783706665, "learning_rate": 1.8964277396172577e-05, "loss": 0.5233, "step": 2052 }, { "epoch": 0.4579522641088557, "grad_norm": 0.16869474947452545, "learning_rate": 1.896323413182296e-05, "loss": 0.4921, "step": 2053 }, { "epoch": 0.45817532902074504, "grad_norm": 0.17108272016048431, "learning_rate": 1.8962190371033057e-05, "loss": 0.5358, "step": 2054 }, { "epoch": 0.4583983939326344, "grad_norm": 0.16527149081230164, "learning_rate": 1.8961146113860676e-05, "loss": 0.5033, "step": 2055 }, { "epoch": 0.45862145884452377, "grad_norm": 0.17335473001003265, "learning_rate": 1.8960101360363656e-05, "loss": 0.5156, "step": 2056 }, { "epoch": 0.4588445237564131, "grad_norm": 0.17064844071865082, "learning_rate": 1.895905611059986e-05, "loss": 0.5118, "step": 2057 }, { "epoch": 0.4590675886683025, "grad_norm": 0.16565892100334167, "learning_rate": 1.8958010364627183e-05, "loss": 0.5152, "step": 2058 }, { "epoch": 0.45929065358019183, "grad_norm": 0.1769896149635315, "learning_rate": 1.8956964122503546e-05, "loss": 0.5483, "step": 2059 }, { "epoch": 0.45951371849208117, "grad_norm": 0.1574466973543167, "learning_rate": 1.895591738428689e-05, "loss": 0.5205, "step": 2060 }, { "epoch": 0.45973678340397056, "grad_norm": 0.16593651473522186, "learning_rate": 1.8954870150035195e-05, "loss": 0.4863, "step": 2061 }, { "epoch": 0.4599598483158599, "grad_norm": 0.16003793478012085, "learning_rate": 1.8953822419806468e-05, "loss": 0.5051, "step": 2062 }, { "epoch": 0.4601829132277493, "grad_norm": 0.17091605067253113, "learning_rate": 1.895277419365873e-05, "loss": 0.5037, "step": 2063 }, { "epoch": 0.4604059781396386, "grad_norm": 0.17469583451747894, "learning_rate": 1.895172547165004e-05, "loss": 0.4878, "step": 2064 }, { "epoch": 0.460629043051528, "grad_norm": 0.16121211647987366, "learning_rate": 1.895067625383849e-05, "loss": 0.4768, "step": 2065 }, { "epoch": 0.46085210796341736, "grad_norm": 0.1591392159461975, "learning_rate": 1.894962654028218e-05, "loss": 0.5138, "step": 2066 }, { "epoch": 0.4610751728753067, "grad_norm": 1.2021454572677612, "learning_rate": 1.8948576331039264e-05, "loss": 0.5438, "step": 2067 }, { "epoch": 0.4612982377871961, "grad_norm": 0.1606753021478653, "learning_rate": 1.8947525626167896e-05, "loss": 0.5376, "step": 2068 }, { "epoch": 0.4615213026990854, "grad_norm": 0.17104879021644592, "learning_rate": 1.894647442572628e-05, "loss": 0.5164, "step": 2069 }, { "epoch": 0.4617443676109748, "grad_norm": 0.16374671459197998, "learning_rate": 1.8945422729772633e-05, "loss": 0.516, "step": 2070 }, { "epoch": 0.46196743252286415, "grad_norm": 0.16128143668174744, "learning_rate": 1.8944370538365206e-05, "loss": 0.5157, "step": 2071 }, { "epoch": 0.4621904974347535, "grad_norm": 0.16720320284366608, "learning_rate": 1.8943317851562278e-05, "loss": 0.5291, "step": 2072 }, { "epoch": 0.4624135623466429, "grad_norm": 0.1879906952381134, "learning_rate": 1.8942264669422154e-05, "loss": 0.5145, "step": 2073 }, { "epoch": 0.4626366272585322, "grad_norm": 0.17528733611106873, "learning_rate": 1.894121099200316e-05, "loss": 0.5235, "step": 2074 }, { "epoch": 0.4628596921704216, "grad_norm": 0.16449770331382751, "learning_rate": 1.894015681936366e-05, "loss": 0.5165, "step": 2075 }, { "epoch": 0.46308275708231095, "grad_norm": 0.16646511852741241, "learning_rate": 1.8939102151562036e-05, "loss": 0.5093, "step": 2076 }, { "epoch": 0.46330582199420034, "grad_norm": 0.17490622401237488, "learning_rate": 1.893804698865671e-05, "loss": 0.5385, "step": 2077 }, { "epoch": 0.4635288869060897, "grad_norm": 0.2023608237504959, "learning_rate": 1.893699133070612e-05, "loss": 0.4972, "step": 2078 }, { "epoch": 0.463751951817979, "grad_norm": 0.16244065761566162, "learning_rate": 1.893593517776873e-05, "loss": 0.5269, "step": 2079 }, { "epoch": 0.4639750167298684, "grad_norm": 0.16335059702396393, "learning_rate": 1.8934878529903043e-05, "loss": 0.5107, "step": 2080 }, { "epoch": 0.46419808164175774, "grad_norm": 0.16115811467170715, "learning_rate": 1.8933821387167582e-05, "loss": 0.5214, "step": 2081 }, { "epoch": 0.46442114655364714, "grad_norm": 0.1833171248435974, "learning_rate": 1.8932763749620894e-05, "loss": 0.4664, "step": 2082 }, { "epoch": 0.46464421146553647, "grad_norm": 0.1721174716949463, "learning_rate": 1.893170561732156e-05, "loss": 0.5224, "step": 2083 }, { "epoch": 0.4648672763774258, "grad_norm": 0.16398632526397705, "learning_rate": 1.8930646990328188e-05, "loss": 0.4968, "step": 2084 }, { "epoch": 0.4650903412893152, "grad_norm": 0.17112642526626587, "learning_rate": 1.892958786869941e-05, "loss": 0.5241, "step": 2085 }, { "epoch": 0.46531340620120454, "grad_norm": 0.18161650002002716, "learning_rate": 1.8928528252493884e-05, "loss": 0.5047, "step": 2086 }, { "epoch": 0.46553647111309393, "grad_norm": 0.1646048128604889, "learning_rate": 1.8927468141770304e-05, "loss": 0.5271, "step": 2087 }, { "epoch": 0.46575953602498327, "grad_norm": 0.16542619466781616, "learning_rate": 1.8926407536587378e-05, "loss": 0.5338, "step": 2088 }, { "epoch": 0.4659826009368726, "grad_norm": 0.17052631080150604, "learning_rate": 1.8925346437003856e-05, "loss": 0.5276, "step": 2089 }, { "epoch": 0.466205665848762, "grad_norm": 0.17085763812065125, "learning_rate": 1.8924284843078503e-05, "loss": 0.5277, "step": 2090 }, { "epoch": 0.46642873076065133, "grad_norm": 0.1689939796924591, "learning_rate": 1.8923222754870124e-05, "loss": 0.4889, "step": 2091 }, { "epoch": 0.4666517956725407, "grad_norm": 0.16189821064472198, "learning_rate": 1.8922160172437535e-05, "loss": 0.5009, "step": 2092 }, { "epoch": 0.46687486058443006, "grad_norm": 0.16525843739509583, "learning_rate": 1.89210970958396e-05, "loss": 0.5073, "step": 2093 }, { "epoch": 0.46709792549631945, "grad_norm": 0.159906804561615, "learning_rate": 1.8920033525135184e-05, "loss": 0.5107, "step": 2094 }, { "epoch": 0.4673209904082088, "grad_norm": 0.15631826221942902, "learning_rate": 1.8918969460383205e-05, "loss": 0.4839, "step": 2095 }, { "epoch": 0.46754405532009813, "grad_norm": 0.1734917014837265, "learning_rate": 1.8917904901642593e-05, "loss": 0.5121, "step": 2096 }, { "epoch": 0.4677671202319875, "grad_norm": 0.17475415766239166, "learning_rate": 1.8916839848972315e-05, "loss": 0.5198, "step": 2097 }, { "epoch": 0.46799018514387686, "grad_norm": 0.15618795156478882, "learning_rate": 1.8915774302431357e-05, "loss": 0.4733, "step": 2098 }, { "epoch": 0.46821325005576625, "grad_norm": 0.17231950163841248, "learning_rate": 1.8914708262078735e-05, "loss": 0.4975, "step": 2099 }, { "epoch": 0.4684363149676556, "grad_norm": 0.1630786657333374, "learning_rate": 1.891364172797349e-05, "loss": 0.4906, "step": 2100 }, { "epoch": 0.4686593798795449, "grad_norm": 0.20263253152370453, "learning_rate": 1.89125747001747e-05, "loss": 0.4961, "step": 2101 }, { "epoch": 0.4688824447914343, "grad_norm": 0.1711629182100296, "learning_rate": 1.891150717874146e-05, "loss": 0.5197, "step": 2102 }, { "epoch": 0.46910550970332365, "grad_norm": 0.15791350603103638, "learning_rate": 1.89104391637329e-05, "loss": 0.5151, "step": 2103 }, { "epoch": 0.46932857461521305, "grad_norm": 0.1648074984550476, "learning_rate": 1.890937065520817e-05, "loss": 0.5206, "step": 2104 }, { "epoch": 0.4695516395271024, "grad_norm": 0.16917914152145386, "learning_rate": 1.8908301653226448e-05, "loss": 0.5308, "step": 2105 }, { "epoch": 0.4697747044389917, "grad_norm": 0.16751410067081451, "learning_rate": 1.8907232157846946e-05, "loss": 0.512, "step": 2106 }, { "epoch": 0.4699977693508811, "grad_norm": 0.16895310580730438, "learning_rate": 1.89061621691289e-05, "loss": 0.5134, "step": 2107 }, { "epoch": 0.47022083426277045, "grad_norm": 0.1659642457962036, "learning_rate": 1.8905091687131567e-05, "loss": 0.5125, "step": 2108 }, { "epoch": 0.47044389917465984, "grad_norm": 0.18306776881217957, "learning_rate": 1.8904020711914243e-05, "loss": 0.5404, "step": 2109 }, { "epoch": 0.4706669640865492, "grad_norm": 0.15722155570983887, "learning_rate": 1.8902949243536245e-05, "loss": 0.4929, "step": 2110 }, { "epoch": 0.47089002899843857, "grad_norm": 0.17024749517440796, "learning_rate": 1.8901877282056916e-05, "loss": 0.5129, "step": 2111 }, { "epoch": 0.4711130939103279, "grad_norm": 0.1668287068605423, "learning_rate": 1.8900804827535626e-05, "loss": 0.5386, "step": 2112 }, { "epoch": 0.47133615882221724, "grad_norm": 0.17657139897346497, "learning_rate": 1.8899731880031778e-05, "loss": 0.4682, "step": 2113 }, { "epoch": 0.47155922373410664, "grad_norm": 0.17689798772335052, "learning_rate": 1.8898658439604798e-05, "loss": 0.4814, "step": 2114 }, { "epoch": 0.471782288645996, "grad_norm": 0.16755974292755127, "learning_rate": 1.8897584506314137e-05, "loss": 0.4956, "step": 2115 }, { "epoch": 0.47200535355788537, "grad_norm": 0.16352280974388123, "learning_rate": 1.8896510080219277e-05, "loss": 0.5034, "step": 2116 }, { "epoch": 0.4722284184697747, "grad_norm": 0.16078925132751465, "learning_rate": 1.889543516137973e-05, "loss": 0.5017, "step": 2117 }, { "epoch": 0.47245148338166404, "grad_norm": 0.17038793861865997, "learning_rate": 1.8894359749855027e-05, "loss": 0.5128, "step": 2118 }, { "epoch": 0.47267454829355343, "grad_norm": 0.16477574408054352, "learning_rate": 1.8893283845704733e-05, "loss": 0.4765, "step": 2119 }, { "epoch": 0.47289761320544277, "grad_norm": 0.18243131041526794, "learning_rate": 1.889220744898844e-05, "loss": 0.5266, "step": 2120 }, { "epoch": 0.47312067811733216, "grad_norm": 0.15715399384498596, "learning_rate": 1.8891130559765763e-05, "loss": 0.5092, "step": 2121 }, { "epoch": 0.4733437430292215, "grad_norm": 0.16996978223323822, "learning_rate": 1.8890053178096353e-05, "loss": 0.5166, "step": 2122 }, { "epoch": 0.4735668079411109, "grad_norm": 0.16575470566749573, "learning_rate": 1.888897530403987e-05, "loss": 0.5212, "step": 2123 }, { "epoch": 0.4737898728530002, "grad_norm": 0.1607031673192978, "learning_rate": 1.8887896937656028e-05, "loss": 0.5067, "step": 2124 }, { "epoch": 0.47401293776488956, "grad_norm": 0.157185897231102, "learning_rate": 1.8886818079004545e-05, "loss": 0.4801, "step": 2125 }, { "epoch": 0.47423600267677896, "grad_norm": 0.1661345213651657, "learning_rate": 1.8885738728145173e-05, "loss": 0.5212, "step": 2126 }, { "epoch": 0.4744590675886683, "grad_norm": 0.17212405800819397, "learning_rate": 1.8884658885137698e-05, "loss": 0.4872, "step": 2127 }, { "epoch": 0.4746821325005577, "grad_norm": 0.1681511402130127, "learning_rate": 1.8883578550041925e-05, "loss": 0.5266, "step": 2128 }, { "epoch": 0.474905197412447, "grad_norm": 0.15793408453464508, "learning_rate": 1.8882497722917697e-05, "loss": 0.505, "step": 2129 }, { "epoch": 0.47512826232433636, "grad_norm": 0.1688978672027588, "learning_rate": 1.8881416403824867e-05, "loss": 0.5005, "step": 2130 }, { "epoch": 0.47535132723622575, "grad_norm": 0.17982251942157745, "learning_rate": 1.8880334592823333e-05, "loss": 0.5177, "step": 2131 }, { "epoch": 0.4755743921481151, "grad_norm": 0.15668730437755585, "learning_rate": 1.8879252289973008e-05, "loss": 0.5076, "step": 2132 }, { "epoch": 0.4757974570600045, "grad_norm": 0.15427713096141815, "learning_rate": 1.8878169495333843e-05, "loss": 0.4916, "step": 2133 }, { "epoch": 0.4760205219718938, "grad_norm": 0.1573878973722458, "learning_rate": 1.88770862089658e-05, "loss": 0.4851, "step": 2134 }, { "epoch": 0.47624358688378315, "grad_norm": 0.16141542792320251, "learning_rate": 1.887600243092889e-05, "loss": 0.5065, "step": 2135 }, { "epoch": 0.47646665179567255, "grad_norm": 0.1874970644712448, "learning_rate": 1.8874918161283127e-05, "loss": 0.4987, "step": 2136 }, { "epoch": 0.4766897167075619, "grad_norm": 0.1556268036365509, "learning_rate": 1.887383340008857e-05, "loss": 0.5001, "step": 2137 }, { "epoch": 0.4769127816194513, "grad_norm": 0.16177254915237427, "learning_rate": 1.8872748147405303e-05, "loss": 0.5145, "step": 2138 }, { "epoch": 0.4771358465313406, "grad_norm": 0.16272678971290588, "learning_rate": 1.8871662403293434e-05, "loss": 0.5111, "step": 2139 }, { "epoch": 0.47735891144323, "grad_norm": 0.157213494181633, "learning_rate": 1.8870576167813096e-05, "loss": 0.4975, "step": 2140 }, { "epoch": 0.47758197635511934, "grad_norm": 0.16583600640296936, "learning_rate": 1.886948944102445e-05, "loss": 0.4835, "step": 2141 }, { "epoch": 0.4778050412670087, "grad_norm": 0.15528440475463867, "learning_rate": 1.8868402222987687e-05, "loss": 0.4821, "step": 2142 }, { "epoch": 0.47802810617889807, "grad_norm": 0.1678980439901352, "learning_rate": 1.8867314513763023e-05, "loss": 0.5126, "step": 2143 }, { "epoch": 0.4782511710907874, "grad_norm": 0.1719445437192917, "learning_rate": 1.886622631341071e-05, "loss": 0.5278, "step": 2144 }, { "epoch": 0.4784742360026768, "grad_norm": 0.16177958250045776, "learning_rate": 1.886513762199101e-05, "loss": 0.4716, "step": 2145 }, { "epoch": 0.47869730091456614, "grad_norm": 0.17055903375148773, "learning_rate": 1.886404843956422e-05, "loss": 0.5155, "step": 2146 }, { "epoch": 0.4789203658264555, "grad_norm": 0.16013142466545105, "learning_rate": 1.8862958766190673e-05, "loss": 0.4882, "step": 2147 }, { "epoch": 0.47914343073834487, "grad_norm": 0.15844453871250153, "learning_rate": 1.886186860193072e-05, "loss": 0.5125, "step": 2148 }, { "epoch": 0.4793664956502342, "grad_norm": 0.16520345211029053, "learning_rate": 1.886077794684474e-05, "loss": 0.5355, "step": 2149 }, { "epoch": 0.4795895605621236, "grad_norm": 0.16560572385787964, "learning_rate": 1.885968680099314e-05, "loss": 0.5011, "step": 2150 }, { "epoch": 0.47981262547401293, "grad_norm": 0.16460593044757843, "learning_rate": 1.885859516443636e-05, "loss": 0.5186, "step": 2151 }, { "epoch": 0.4800356903859023, "grad_norm": 0.16416500508785248, "learning_rate": 1.885750303723485e-05, "loss": 0.5121, "step": 2152 }, { "epoch": 0.48025875529779166, "grad_norm": 0.1615086942911148, "learning_rate": 1.8856410419449108e-05, "loss": 0.5291, "step": 2153 }, { "epoch": 0.480481820209681, "grad_norm": 0.16628290712833405, "learning_rate": 1.885531731113965e-05, "loss": 0.5166, "step": 2154 }, { "epoch": 0.4807048851215704, "grad_norm": 0.212614044547081, "learning_rate": 1.8854223712367017e-05, "loss": 0.4687, "step": 2155 }, { "epoch": 0.48092795003345973, "grad_norm": 0.16417112946510315, "learning_rate": 1.8853129623191775e-05, "loss": 0.4839, "step": 2156 }, { "epoch": 0.4811510149453491, "grad_norm": 0.16616222262382507, "learning_rate": 1.8852035043674534e-05, "loss": 0.5013, "step": 2157 }, { "epoch": 0.48137407985723846, "grad_norm": 0.16397857666015625, "learning_rate": 1.8850939973875907e-05, "loss": 0.5163, "step": 2158 }, { "epoch": 0.4815971447691278, "grad_norm": 0.1688777208328247, "learning_rate": 1.8849844413856548e-05, "loss": 0.5154, "step": 2159 }, { "epoch": 0.4818202096810172, "grad_norm": 0.16248802840709686, "learning_rate": 1.884874836367714e-05, "loss": 0.49, "step": 2160 }, { "epoch": 0.4820432745929065, "grad_norm": 0.17419035732746124, "learning_rate": 1.8847651823398385e-05, "loss": 0.5105, "step": 2161 }, { "epoch": 0.4822663395047959, "grad_norm": 0.16489480435848236, "learning_rate": 1.884655479308102e-05, "loss": 0.4967, "step": 2162 }, { "epoch": 0.48248940441668525, "grad_norm": 0.16695787012577057, "learning_rate": 1.8845457272785802e-05, "loss": 0.5316, "step": 2163 }, { "epoch": 0.4827124693285746, "grad_norm": 0.1630323827266693, "learning_rate": 1.884435926257352e-05, "loss": 0.5144, "step": 2164 }, { "epoch": 0.482935534240464, "grad_norm": 0.16861893236637115, "learning_rate": 1.8843260762504985e-05, "loss": 0.5402, "step": 2165 }, { "epoch": 0.4831585991523533, "grad_norm": 0.16794002056121826, "learning_rate": 1.884216177264105e-05, "loss": 0.5581, "step": 2166 }, { "epoch": 0.4833816640642427, "grad_norm": 0.16783180832862854, "learning_rate": 1.8841062293042572e-05, "loss": 0.4884, "step": 2167 }, { "epoch": 0.48360472897613205, "grad_norm": 0.16696153581142426, "learning_rate": 1.8839962323770455e-05, "loss": 0.4893, "step": 2168 }, { "epoch": 0.48382779388802144, "grad_norm": 0.17243477702140808, "learning_rate": 1.8838861864885617e-05, "loss": 0.5134, "step": 2169 }, { "epoch": 0.4840508587999108, "grad_norm": 0.16844888031482697, "learning_rate": 1.883776091644901e-05, "loss": 0.5203, "step": 2170 }, { "epoch": 0.4842739237118001, "grad_norm": 0.16011419892311096, "learning_rate": 1.8836659478521614e-05, "loss": 0.4837, "step": 2171 }, { "epoch": 0.4844969886236895, "grad_norm": 0.16117213666439056, "learning_rate": 1.883555755116443e-05, "loss": 0.5124, "step": 2172 }, { "epoch": 0.48472005353557884, "grad_norm": 0.15577594935894012, "learning_rate": 1.883445513443849e-05, "loss": 0.4939, "step": 2173 }, { "epoch": 0.48494311844746824, "grad_norm": 0.17128697037696838, "learning_rate": 1.883335222840485e-05, "loss": 0.5331, "step": 2174 }, { "epoch": 0.4851661833593576, "grad_norm": 0.16078589856624603, "learning_rate": 1.8832248833124606e-05, "loss": 0.5348, "step": 2175 }, { "epoch": 0.4853892482712469, "grad_norm": 0.17158381640911102, "learning_rate": 1.8831144948658863e-05, "loss": 0.5016, "step": 2176 }, { "epoch": 0.4856123131831363, "grad_norm": 0.17395475506782532, "learning_rate": 1.883004057506876e-05, "loss": 0.5145, "step": 2177 }, { "epoch": 0.48583537809502564, "grad_norm": 0.15187759697437286, "learning_rate": 1.882893571241547e-05, "loss": 0.4747, "step": 2178 }, { "epoch": 0.48605844300691503, "grad_norm": 0.1744384616613388, "learning_rate": 1.8827830360760184e-05, "loss": 0.488, "step": 2179 }, { "epoch": 0.48628150791880437, "grad_norm": 0.16714079678058624, "learning_rate": 1.8826724520164118e-05, "loss": 0.5117, "step": 2180 }, { "epoch": 0.4865045728306937, "grad_norm": 0.1541585475206375, "learning_rate": 1.8825618190688534e-05, "loss": 0.4808, "step": 2181 }, { "epoch": 0.4867276377425831, "grad_norm": 0.16642670333385468, "learning_rate": 1.8824511372394694e-05, "loss": 0.5223, "step": 2182 }, { "epoch": 0.48695070265447243, "grad_norm": 0.16418053209781647, "learning_rate": 1.8823404065343904e-05, "loss": 0.5022, "step": 2183 }, { "epoch": 0.4871737675663618, "grad_norm": 0.1558452993631363, "learning_rate": 1.88222962695975e-05, "loss": 0.5009, "step": 2184 }, { "epoch": 0.48739683247825116, "grad_norm": 0.16112567484378815, "learning_rate": 1.8821187985216835e-05, "loss": 0.4851, "step": 2185 }, { "epoch": 0.48761989739014056, "grad_norm": 0.15951795876026154, "learning_rate": 1.8820079212263287e-05, "loss": 0.5364, "step": 2186 }, { "epoch": 0.4878429623020299, "grad_norm": 0.15660803020000458, "learning_rate": 1.8818969950798274e-05, "loss": 0.4687, "step": 2187 }, { "epoch": 0.48806602721391923, "grad_norm": 0.16717955470085144, "learning_rate": 1.881786020088323e-05, "loss": 0.5184, "step": 2188 }, { "epoch": 0.4882890921258086, "grad_norm": 0.4413386583328247, "learning_rate": 1.8816749962579625e-05, "loss": 0.49, "step": 2189 }, { "epoch": 0.48851215703769796, "grad_norm": 0.18018092215061188, "learning_rate": 1.8815639235948945e-05, "loss": 0.4948, "step": 2190 }, { "epoch": 0.48873522194958735, "grad_norm": 0.17363835871219635, "learning_rate": 1.881452802105271e-05, "loss": 0.5255, "step": 2191 }, { "epoch": 0.4889582868614767, "grad_norm": 0.16417300701141357, "learning_rate": 1.8813416317952474e-05, "loss": 0.4984, "step": 2192 }, { "epoch": 0.489181351773366, "grad_norm": 0.1698407083749771, "learning_rate": 1.8812304126709797e-05, "loss": 0.5155, "step": 2193 }, { "epoch": 0.4894044166852554, "grad_norm": 0.16547353565692902, "learning_rate": 1.881119144738629e-05, "loss": 0.504, "step": 2194 }, { "epoch": 0.48962748159714475, "grad_norm": 0.15916848182678223, "learning_rate": 1.8810078280043574e-05, "loss": 0.5158, "step": 2195 }, { "epoch": 0.48985054650903415, "grad_norm": 0.16655400395393372, "learning_rate": 1.8808964624743303e-05, "loss": 0.521, "step": 2196 }, { "epoch": 0.4900736114209235, "grad_norm": 0.16791512072086334, "learning_rate": 1.8807850481547165e-05, "loss": 0.5327, "step": 2197 }, { "epoch": 0.4902966763328129, "grad_norm": 0.1612214297056198, "learning_rate": 1.880673585051686e-05, "loss": 0.5074, "step": 2198 }, { "epoch": 0.4905197412447022, "grad_norm": 0.33009010553359985, "learning_rate": 1.880562073171413e-05, "loss": 0.4991, "step": 2199 }, { "epoch": 0.49074280615659155, "grad_norm": 0.17632457613945007, "learning_rate": 1.8804505125200732e-05, "loss": 0.5094, "step": 2200 }, { "epoch": 0.49096587106848094, "grad_norm": 0.17380337417125702, "learning_rate": 1.8803389031038462e-05, "loss": 0.5132, "step": 2201 }, { "epoch": 0.4911889359803703, "grad_norm": 0.16013850271701813, "learning_rate": 1.880227244928913e-05, "loss": 0.4987, "step": 2202 }, { "epoch": 0.49141200089225967, "grad_norm": 0.18471701443195343, "learning_rate": 1.8801155380014578e-05, "loss": 0.5203, "step": 2203 }, { "epoch": 0.491635065804149, "grad_norm": 0.16928328573703766, "learning_rate": 1.8800037823276683e-05, "loss": 0.4868, "step": 2204 }, { "epoch": 0.49185813071603834, "grad_norm": 0.1749187409877777, "learning_rate": 1.8798919779137337e-05, "loss": 0.5049, "step": 2205 }, { "epoch": 0.49208119562792774, "grad_norm": 0.173324853181839, "learning_rate": 1.8797801247658465e-05, "loss": 0.4963, "step": 2206 }, { "epoch": 0.4923042605398171, "grad_norm": 0.16650435328483582, "learning_rate": 1.8796682228902024e-05, "loss": 0.5232, "step": 2207 }, { "epoch": 0.49252732545170647, "grad_norm": 0.16675227880477905, "learning_rate": 1.8795562722929986e-05, "loss": 0.4887, "step": 2208 }, { "epoch": 0.4927503903635958, "grad_norm": 0.20501257479190826, "learning_rate": 1.8794442729804356e-05, "loss": 0.5045, "step": 2209 }, { "epoch": 0.49297345527548514, "grad_norm": 0.15837359428405762, "learning_rate": 1.879332224958717e-05, "loss": 0.4704, "step": 2210 }, { "epoch": 0.49319652018737453, "grad_norm": 0.15580782294273376, "learning_rate": 1.8792201282340485e-05, "loss": 0.4753, "step": 2211 }, { "epoch": 0.49341958509926387, "grad_norm": 0.16155748069286346, "learning_rate": 1.879107982812639e-05, "loss": 0.5178, "step": 2212 }, { "epoch": 0.49364265001115326, "grad_norm": 0.17078126966953278, "learning_rate": 1.8789957887006994e-05, "loss": 0.5208, "step": 2213 }, { "epoch": 0.4938657149230426, "grad_norm": 0.16327013075351715, "learning_rate": 1.8788835459044438e-05, "loss": 0.5446, "step": 2214 }, { "epoch": 0.494088779834932, "grad_norm": 0.1604888141155243, "learning_rate": 1.878771254430089e-05, "loss": 0.5142, "step": 2215 }, { "epoch": 0.4943118447468213, "grad_norm": 0.16108398139476776, "learning_rate": 1.8786589142838548e-05, "loss": 0.517, "step": 2216 }, { "epoch": 0.49453490965871066, "grad_norm": 0.15508361160755157, "learning_rate": 1.8785465254719625e-05, "loss": 0.4782, "step": 2217 }, { "epoch": 0.49475797457060006, "grad_norm": 0.1612912118434906, "learning_rate": 1.878434088000638e-05, "loss": 0.5104, "step": 2218 }, { "epoch": 0.4949810394824894, "grad_norm": 0.1641056090593338, "learning_rate": 1.8783216018761075e-05, "loss": 0.5227, "step": 2219 }, { "epoch": 0.4952041043943788, "grad_norm": 0.16892564296722412, "learning_rate": 1.878209067104602e-05, "loss": 0.5142, "step": 2220 }, { "epoch": 0.4954271693062681, "grad_norm": 0.18777111172676086, "learning_rate": 1.8780964836923545e-05, "loss": 0.5091, "step": 2221 }, { "epoch": 0.49565023421815746, "grad_norm": 0.15524406731128693, "learning_rate": 1.8779838516455998e-05, "loss": 0.5155, "step": 2222 }, { "epoch": 0.49587329913004685, "grad_norm": 0.1688852459192276, "learning_rate": 1.877871170970577e-05, "loss": 0.5266, "step": 2223 }, { "epoch": 0.4960963640419362, "grad_norm": 0.2029954195022583, "learning_rate": 1.8777584416735268e-05, "loss": 0.4751, "step": 2224 }, { "epoch": 0.4963194289538256, "grad_norm": 0.168426051735878, "learning_rate": 1.8776456637606926e-05, "loss": 0.5263, "step": 2225 }, { "epoch": 0.4965424938657149, "grad_norm": 0.18985538184642792, "learning_rate": 1.877532837238321e-05, "loss": 0.528, "step": 2226 }, { "epoch": 0.4967655587776043, "grad_norm": 0.15908297896385193, "learning_rate": 1.8774199621126605e-05, "loss": 0.5226, "step": 2227 }, { "epoch": 0.49698862368949365, "grad_norm": 0.15700525045394897, "learning_rate": 1.8773070383899638e-05, "loss": 0.4857, "step": 2228 }, { "epoch": 0.497211688601383, "grad_norm": 0.17015953361988068, "learning_rate": 1.877194066076485e-05, "loss": 0.488, "step": 2229 }, { "epoch": 0.4974347535132724, "grad_norm": 0.21846015751361847, "learning_rate": 1.8770810451784806e-05, "loss": 0.5305, "step": 2230 }, { "epoch": 0.4976578184251617, "grad_norm": 0.16297823190689087, "learning_rate": 1.8769679757022114e-05, "loss": 0.494, "step": 2231 }, { "epoch": 0.4978808833370511, "grad_norm": 0.1758735030889511, "learning_rate": 1.876854857653939e-05, "loss": 0.5394, "step": 2232 }, { "epoch": 0.49810394824894044, "grad_norm": 0.1673547923564911, "learning_rate": 1.876741691039929e-05, "loss": 0.5035, "step": 2233 }, { "epoch": 0.4983270131608298, "grad_norm": 0.16478639841079712, "learning_rate": 1.8766284758664487e-05, "loss": 0.5, "step": 2234 }, { "epoch": 0.49855007807271917, "grad_norm": 0.16910696029663086, "learning_rate": 1.8765152121397697e-05, "loss": 0.5087, "step": 2235 }, { "epoch": 0.4987731429846085, "grad_norm": 0.16947272419929504, "learning_rate": 1.876401899866165e-05, "loss": 0.5064, "step": 2236 }, { "epoch": 0.4989962078964979, "grad_norm": 0.16172346472740173, "learning_rate": 1.87628853905191e-05, "loss": 0.4901, "step": 2237 }, { "epoch": 0.49921927280838724, "grad_norm": 0.16061736643314362, "learning_rate": 1.8761751297032838e-05, "loss": 0.5038, "step": 2238 }, { "epoch": 0.4994423377202766, "grad_norm": 0.15990641713142395, "learning_rate": 1.8760616718265676e-05, "loss": 0.4854, "step": 2239 }, { "epoch": 0.49966540263216597, "grad_norm": 0.16735920310020447, "learning_rate": 1.875948165428045e-05, "loss": 0.5027, "step": 2240 }, { "epoch": 0.4998884675440553, "grad_norm": 0.16122539341449738, "learning_rate": 1.8758346105140033e-05, "loss": 0.4922, "step": 2241 }, { "epoch": 0.5001115324559446, "grad_norm": 0.1718963235616684, "learning_rate": 1.8757210070907315e-05, "loss": 0.5138, "step": 2242 }, { "epoch": 0.500334597367834, "grad_norm": 0.17143313586711884, "learning_rate": 1.875607355164522e-05, "loss": 0.5308, "step": 2243 }, { "epoch": 0.5005576622797234, "grad_norm": 0.15493686497211456, "learning_rate": 1.875493654741669e-05, "loss": 0.5243, "step": 2244 }, { "epoch": 0.5007807271916127, "grad_norm": 0.162288635969162, "learning_rate": 1.8753799058284707e-05, "loss": 0.5052, "step": 2245 }, { "epoch": 0.5010037921035021, "grad_norm": 0.17301948368549347, "learning_rate": 1.8752661084312268e-05, "loss": 0.5119, "step": 2246 }, { "epoch": 0.5012268570153915, "grad_norm": 0.16729313135147095, "learning_rate": 1.8751522625562405e-05, "loss": 0.5372, "step": 2247 }, { "epoch": 0.5014499219272809, "grad_norm": 0.16277892887592316, "learning_rate": 1.8750383682098166e-05, "loss": 0.4847, "step": 2248 }, { "epoch": 0.5016729868391702, "grad_norm": 0.16257484257221222, "learning_rate": 1.8749244253982633e-05, "loss": 0.5105, "step": 2249 }, { "epoch": 0.5018960517510596, "grad_norm": 0.16304615139961243, "learning_rate": 1.8748104341278924e-05, "loss": 0.5073, "step": 2250 }, { "epoch": 0.502119116662949, "grad_norm": 0.1586243063211441, "learning_rate": 1.874696394405017e-05, "loss": 0.5096, "step": 2251 }, { "epoch": 0.5023421815748382, "grad_norm": 0.16425400972366333, "learning_rate": 1.874582306235953e-05, "loss": 0.4817, "step": 2252 }, { "epoch": 0.5025652464867276, "grad_norm": 0.16808679699897766, "learning_rate": 1.874468169627019e-05, "loss": 0.4866, "step": 2253 }, { "epoch": 0.502788311398617, "grad_norm": 0.1656448394060135, "learning_rate": 1.8743539845845378e-05, "loss": 0.4903, "step": 2254 }, { "epoch": 0.5030113763105064, "grad_norm": 0.16811887919902802, "learning_rate": 1.8742397511148328e-05, "loss": 0.4954, "step": 2255 }, { "epoch": 0.5032344412223957, "grad_norm": 0.17424499988555908, "learning_rate": 1.8741254692242315e-05, "loss": 0.5301, "step": 2256 }, { "epoch": 0.5034575061342851, "grad_norm": 0.17697711288928986, "learning_rate": 1.874011138919063e-05, "loss": 0.5103, "step": 2257 }, { "epoch": 0.5036805710461745, "grad_norm": 0.16454057395458221, "learning_rate": 1.8738967602056597e-05, "loss": 0.499, "step": 2258 }, { "epoch": 0.5039036359580638, "grad_norm": 0.16936853528022766, "learning_rate": 1.873782333090357e-05, "loss": 0.5029, "step": 2259 }, { "epoch": 0.5041267008699531, "grad_norm": 0.16321606934070587, "learning_rate": 1.873667857579492e-05, "loss": 0.5043, "step": 2260 }, { "epoch": 0.5043497657818425, "grad_norm": 0.17789475619792938, "learning_rate": 1.873553333679406e-05, "loss": 0.5206, "step": 2261 }, { "epoch": 0.5045728306937318, "grad_norm": 0.17053046822547913, "learning_rate": 1.8734387613964414e-05, "loss": 0.5241, "step": 2262 }, { "epoch": 0.5047958956056212, "grad_norm": 0.16408482193946838, "learning_rate": 1.8733241407369438e-05, "loss": 0.4999, "step": 2263 }, { "epoch": 0.5050189605175106, "grad_norm": 0.1780211627483368, "learning_rate": 1.873209471707262e-05, "loss": 0.5014, "step": 2264 }, { "epoch": 0.5052420254294, "grad_norm": 0.17059922218322754, "learning_rate": 1.873094754313747e-05, "loss": 0.5315, "step": 2265 }, { "epoch": 0.5054650903412893, "grad_norm": 0.17030328512191772, "learning_rate": 1.8729799885627528e-05, "loss": 0.5295, "step": 2266 }, { "epoch": 0.5056881552531787, "grad_norm": 0.1972958743572235, "learning_rate": 1.872865174460635e-05, "loss": 0.5094, "step": 2267 }, { "epoch": 0.5059112201650681, "grad_norm": 0.1663295030593872, "learning_rate": 1.8727503120137537e-05, "loss": 0.5023, "step": 2268 }, { "epoch": 0.5061342850769573, "grad_norm": 0.1621520221233368, "learning_rate": 1.87263540122847e-05, "loss": 0.5141, "step": 2269 }, { "epoch": 0.5063573499888467, "grad_norm": 0.35805073380470276, "learning_rate": 1.872520442111149e-05, "loss": 0.539, "step": 2270 }, { "epoch": 0.5065804149007361, "grad_norm": 0.16250286996364594, "learning_rate": 1.8724054346681573e-05, "loss": 0.5173, "step": 2271 }, { "epoch": 0.5068034798126255, "grad_norm": 0.16895325481891632, "learning_rate": 1.872290378905865e-05, "loss": 0.5041, "step": 2272 }, { "epoch": 0.5070265447245148, "grad_norm": 0.1690344661474228, "learning_rate": 1.872175274830645e-05, "loss": 0.5337, "step": 2273 }, { "epoch": 0.5072496096364042, "grad_norm": 0.15611621737480164, "learning_rate": 1.8720601224488716e-05, "loss": 0.4819, "step": 2274 }, { "epoch": 0.5074726745482936, "grad_norm": 0.18844662606716156, "learning_rate": 1.871944921766923e-05, "loss": 0.5082, "step": 2275 }, { "epoch": 0.5076957394601829, "grad_norm": 0.16826370358467102, "learning_rate": 1.8718296727911803e-05, "loss": 0.4914, "step": 2276 }, { "epoch": 0.5079188043720723, "grad_norm": 0.15561175346374512, "learning_rate": 1.871714375528026e-05, "loss": 0.5147, "step": 2277 }, { "epoch": 0.5081418692839617, "grad_norm": 0.1746017336845398, "learning_rate": 1.8715990299838463e-05, "loss": 0.5224, "step": 2278 }, { "epoch": 0.508364934195851, "grad_norm": 0.1721627563238144, "learning_rate": 1.8714836361650303e-05, "loss": 0.509, "step": 2279 }, { "epoch": 0.5085879991077403, "grad_norm": 0.15273146331310272, "learning_rate": 1.871368194077968e-05, "loss": 0.4933, "step": 2280 }, { "epoch": 0.5088110640196297, "grad_norm": 0.17017610371112823, "learning_rate": 1.8712527037290546e-05, "loss": 0.518, "step": 2281 }, { "epoch": 0.5090341289315191, "grad_norm": 0.16407892107963562, "learning_rate": 1.8711371651246854e-05, "loss": 0.5152, "step": 2282 }, { "epoch": 0.5092571938434084, "grad_norm": 0.18274806439876556, "learning_rate": 1.8710215782712606e-05, "loss": 0.4959, "step": 2283 }, { "epoch": 0.5094802587552978, "grad_norm": 0.17501875758171082, "learning_rate": 1.870905943175182e-05, "loss": 0.5372, "step": 2284 }, { "epoch": 0.5097033236671872, "grad_norm": 0.1640838235616684, "learning_rate": 1.870790259842854e-05, "loss": 0.5102, "step": 2285 }, { "epoch": 0.5099263885790765, "grad_norm": 0.15836408734321594, "learning_rate": 1.870674528280684e-05, "loss": 0.5194, "step": 2286 }, { "epoch": 0.5101494534909659, "grad_norm": 0.16688112914562225, "learning_rate": 1.8705587484950815e-05, "loss": 0.5077, "step": 2287 }, { "epoch": 0.5103725184028552, "grad_norm": 0.16494499146938324, "learning_rate": 1.8704429204924598e-05, "loss": 0.4954, "step": 2288 }, { "epoch": 0.5105955833147446, "grad_norm": 0.1580001264810562, "learning_rate": 1.8703270442792337e-05, "loss": 0.507, "step": 2289 }, { "epoch": 0.5108186482266339, "grad_norm": 0.17867796123027802, "learning_rate": 1.8702111198618213e-05, "loss": 0.5081, "step": 2290 }, { "epoch": 0.5110417131385233, "grad_norm": 0.28131747245788574, "learning_rate": 1.8700951472466435e-05, "loss": 0.5188, "step": 2291 }, { "epoch": 0.5112647780504127, "grad_norm": 0.1570468544960022, "learning_rate": 1.869979126440123e-05, "loss": 0.4992, "step": 2292 }, { "epoch": 0.511487842962302, "grad_norm": 0.1686072200536728, "learning_rate": 1.8698630574486862e-05, "loss": 0.4951, "step": 2293 }, { "epoch": 0.5117109078741914, "grad_norm": 0.1702122986316681, "learning_rate": 1.869746940278762e-05, "loss": 0.4965, "step": 2294 }, { "epoch": 0.5119339727860808, "grad_norm": 0.1632847636938095, "learning_rate": 1.8696307749367807e-05, "loss": 0.48, "step": 2295 }, { "epoch": 0.5121570376979702, "grad_norm": 0.20230402052402496, "learning_rate": 1.8695145614291773e-05, "loss": 0.5262, "step": 2296 }, { "epoch": 0.5123801026098594, "grad_norm": 0.17471125721931458, "learning_rate": 1.8693982997623877e-05, "loss": 0.5207, "step": 2297 }, { "epoch": 0.5126031675217488, "grad_norm": 0.17141500115394592, "learning_rate": 1.869281989942852e-05, "loss": 0.4995, "step": 2298 }, { "epoch": 0.5128262324336382, "grad_norm": 0.1641875058412552, "learning_rate": 1.8691656319770112e-05, "loss": 0.4773, "step": 2299 }, { "epoch": 0.5130492973455275, "grad_norm": 0.1655300408601761, "learning_rate": 1.8690492258713107e-05, "loss": 0.5171, "step": 2300 }, { "epoch": 0.5132723622574169, "grad_norm": 0.15635208785533905, "learning_rate": 1.8689327716321975e-05, "loss": 0.5005, "step": 2301 }, { "epoch": 0.5134954271693063, "grad_norm": 0.1540510356426239, "learning_rate": 1.8688162692661214e-05, "loss": 0.4925, "step": 2302 }, { "epoch": 0.5137184920811956, "grad_norm": 0.16273614764213562, "learning_rate": 1.8686997187795354e-05, "loss": 0.5153, "step": 2303 }, { "epoch": 0.513941556993085, "grad_norm": 0.16257858276367188, "learning_rate": 1.8685831201788945e-05, "loss": 0.495, "step": 2304 }, { "epoch": 0.5141646219049744, "grad_norm": 0.1771703064441681, "learning_rate": 1.8684664734706572e-05, "loss": 0.5499, "step": 2305 }, { "epoch": 0.5143876868168638, "grad_norm": 0.17417144775390625, "learning_rate": 1.8683497786612834e-05, "loss": 0.4777, "step": 2306 }, { "epoch": 0.514610751728753, "grad_norm": 0.2703874707221985, "learning_rate": 1.8682330357572368e-05, "loss": 0.5032, "step": 2307 }, { "epoch": 0.5148338166406424, "grad_norm": 0.16738004982471466, "learning_rate": 1.8681162447649834e-05, "loss": 0.514, "step": 2308 }, { "epoch": 0.5150568815525318, "grad_norm": 0.17614148557186127, "learning_rate": 1.8679994056909915e-05, "loss": 0.5193, "step": 2309 }, { "epoch": 0.5152799464644211, "grad_norm": 0.1655927300453186, "learning_rate": 1.8678825185417328e-05, "loss": 0.5253, "step": 2310 }, { "epoch": 0.5155030113763105, "grad_norm": 0.16509908437728882, "learning_rate": 1.867765583323681e-05, "loss": 0.5108, "step": 2311 }, { "epoch": 0.5157260762881999, "grad_norm": 0.17742401361465454, "learning_rate": 1.8676486000433123e-05, "loss": 0.5024, "step": 2312 }, { "epoch": 0.5159491412000893, "grad_norm": 0.1574956476688385, "learning_rate": 1.8675315687071068e-05, "loss": 0.4944, "step": 2313 }, { "epoch": 0.5161722061119786, "grad_norm": 0.16360540688037872, "learning_rate": 1.867414489321546e-05, "loss": 0.5054, "step": 2314 }, { "epoch": 0.516395271023868, "grad_norm": 0.1697058230638504, "learning_rate": 1.8672973618931144e-05, "loss": 0.5198, "step": 2315 }, { "epoch": 0.5166183359357573, "grad_norm": 0.16391722857952118, "learning_rate": 1.8671801864282996e-05, "loss": 0.5138, "step": 2316 }, { "epoch": 0.5168414008476466, "grad_norm": 0.1696222871541977, "learning_rate": 1.867062962933591e-05, "loss": 0.4929, "step": 2317 }, { "epoch": 0.517064465759536, "grad_norm": 0.16633786261081696, "learning_rate": 1.8669456914154817e-05, "loss": 0.4723, "step": 2318 }, { "epoch": 0.5172875306714254, "grad_norm": 0.16019189357757568, "learning_rate": 1.8668283718804664e-05, "loss": 0.5093, "step": 2319 }, { "epoch": 0.5175105955833147, "grad_norm": 0.1600850522518158, "learning_rate": 1.8667110043350435e-05, "loss": 0.4998, "step": 2320 }, { "epoch": 0.5177336604952041, "grad_norm": 0.1658647060394287, "learning_rate": 1.8665935887857136e-05, "loss": 0.5201, "step": 2321 }, { "epoch": 0.5179567254070935, "grad_norm": 0.16224202513694763, "learning_rate": 1.8664761252389795e-05, "loss": 0.4979, "step": 2322 }, { "epoch": 0.5181797903189829, "grad_norm": 0.16525104641914368, "learning_rate": 1.866358613701347e-05, "loss": 0.5168, "step": 2323 }, { "epoch": 0.5184028552308722, "grad_norm": 0.16819415986537933, "learning_rate": 1.866241054179325e-05, "loss": 0.5189, "step": 2324 }, { "epoch": 0.5186259201427615, "grad_norm": 0.1534615010023117, "learning_rate": 1.8661234466794246e-05, "loss": 0.4935, "step": 2325 }, { "epoch": 0.5188489850546509, "grad_norm": 0.15495775640010834, "learning_rate": 1.8660057912081598e-05, "loss": 0.5039, "step": 2326 }, { "epoch": 0.5190720499665402, "grad_norm": 0.16145430505275726, "learning_rate": 1.8658880877720467e-05, "loss": 0.4758, "step": 2327 }, { "epoch": 0.5192951148784296, "grad_norm": 0.16488440334796906, "learning_rate": 1.8657703363776044e-05, "loss": 0.5183, "step": 2328 }, { "epoch": 0.519518179790319, "grad_norm": 0.16661033034324646, "learning_rate": 1.8656525370313553e-05, "loss": 0.519, "step": 2329 }, { "epoch": 0.5197412447022084, "grad_norm": 0.15961341559886932, "learning_rate": 1.8655346897398234e-05, "loss": 0.478, "step": 2330 }, { "epoch": 0.5199643096140977, "grad_norm": 0.15573342144489288, "learning_rate": 1.865416794509536e-05, "loss": 0.4843, "step": 2331 }, { "epoch": 0.5201873745259871, "grad_norm": 0.16535112261772156, "learning_rate": 1.8652988513470227e-05, "loss": 0.4943, "step": 2332 }, { "epoch": 0.5204104394378765, "grad_norm": 0.16046664118766785, "learning_rate": 1.865180860258816e-05, "loss": 0.5445, "step": 2333 }, { "epoch": 0.5206335043497657, "grad_norm": 0.16381436586380005, "learning_rate": 1.8650628212514516e-05, "loss": 0.4956, "step": 2334 }, { "epoch": 0.5208565692616551, "grad_norm": 0.17565996944904327, "learning_rate": 1.864944734331466e-05, "loss": 0.5186, "step": 2335 }, { "epoch": 0.5210796341735445, "grad_norm": 0.15597450733184814, "learning_rate": 1.8648265995054005e-05, "loss": 0.4993, "step": 2336 }, { "epoch": 0.5213026990854338, "grad_norm": 0.22045518457889557, "learning_rate": 1.8647084167797982e-05, "loss": 0.4938, "step": 2337 }, { "epoch": 0.5215257639973232, "grad_norm": 0.14948628842830658, "learning_rate": 1.8645901861612044e-05, "loss": 0.4637, "step": 2338 }, { "epoch": 0.5217488289092126, "grad_norm": 0.16708412766456604, "learning_rate": 1.8644719076561675e-05, "loss": 0.5093, "step": 2339 }, { "epoch": 0.521971893821102, "grad_norm": 0.1535186767578125, "learning_rate": 1.8643535812712386e-05, "loss": 0.4894, "step": 2340 }, { "epoch": 0.5221949587329913, "grad_norm": 0.19612246751785278, "learning_rate": 1.8642352070129715e-05, "loss": 0.4719, "step": 2341 }, { "epoch": 0.5224180236448807, "grad_norm": 0.1691182553768158, "learning_rate": 1.8641167848879225e-05, "loss": 0.5119, "step": 2342 }, { "epoch": 0.52264108855677, "grad_norm": 0.16633757948875427, "learning_rate": 1.86399831490265e-05, "loss": 0.4941, "step": 2343 }, { "epoch": 0.5228641534686593, "grad_norm": 0.15868477523326874, "learning_rate": 1.8638797970637162e-05, "loss": 0.5114, "step": 2344 }, { "epoch": 0.5230872183805487, "grad_norm": 0.1649598479270935, "learning_rate": 1.8637612313776856e-05, "loss": 0.5446, "step": 2345 }, { "epoch": 0.5233102832924381, "grad_norm": 0.1706569939851761, "learning_rate": 1.8636426178511246e-05, "loss": 0.509, "step": 2346 }, { "epoch": 0.5235333482043275, "grad_norm": 0.16140028834342957, "learning_rate": 1.8635239564906026e-05, "loss": 0.4963, "step": 2347 }, { "epoch": 0.5237564131162168, "grad_norm": 0.16061624884605408, "learning_rate": 1.8634052473026925e-05, "loss": 0.5149, "step": 2348 }, { "epoch": 0.5239794780281062, "grad_norm": 0.1699933260679245, "learning_rate": 1.8632864902939684e-05, "loss": 0.5436, "step": 2349 }, { "epoch": 0.5242025429399956, "grad_norm": 0.16812016069889069, "learning_rate": 1.8631676854710082e-05, "loss": 0.5179, "step": 2350 }, { "epoch": 0.5244256078518849, "grad_norm": 0.1655721366405487, "learning_rate": 1.8630488328403924e-05, "loss": 0.5069, "step": 2351 }, { "epoch": 0.5246486727637742, "grad_norm": 0.28273990750312805, "learning_rate": 1.8629299324087032e-05, "loss": 0.5207, "step": 2352 }, { "epoch": 0.5248717376756636, "grad_norm": 0.167250394821167, "learning_rate": 1.8628109841825263e-05, "loss": 0.4995, "step": 2353 }, { "epoch": 0.525094802587553, "grad_norm": 0.15988002717494965, "learning_rate": 1.8626919881684497e-05, "loss": 0.5331, "step": 2354 }, { "epoch": 0.5253178674994423, "grad_norm": 0.18735967576503754, "learning_rate": 1.8625729443730643e-05, "loss": 0.508, "step": 2355 }, { "epoch": 0.5255409324113317, "grad_norm": 0.16121627390384674, "learning_rate": 1.8624538528029638e-05, "loss": 0.4926, "step": 2356 }, { "epoch": 0.5257639973232211, "grad_norm": 0.16314919292926788, "learning_rate": 1.8623347134647437e-05, "loss": 0.4936, "step": 2357 }, { "epoch": 0.5259870622351104, "grad_norm": 0.16220073401927948, "learning_rate": 1.862215526365003e-05, "loss": 0.4866, "step": 2358 }, { "epoch": 0.5262101271469998, "grad_norm": 0.18198350071907043, "learning_rate": 1.8620962915103425e-05, "loss": 0.4969, "step": 2359 }, { "epoch": 0.5264331920588892, "grad_norm": 0.16916455328464508, "learning_rate": 1.8619770089073665e-05, "loss": 0.4872, "step": 2360 }, { "epoch": 0.5266562569707784, "grad_norm": 0.19399681687355042, "learning_rate": 1.861857678562682e-05, "loss": 0.5037, "step": 2361 }, { "epoch": 0.5268793218826678, "grad_norm": 0.16876184940338135, "learning_rate": 1.8617383004828978e-05, "loss": 0.5265, "step": 2362 }, { "epoch": 0.5271023867945572, "grad_norm": 0.17060886323451996, "learning_rate": 1.8616188746746262e-05, "loss": 0.5247, "step": 2363 }, { "epoch": 0.5273254517064466, "grad_norm": 0.17065522074699402, "learning_rate": 1.8614994011444812e-05, "loss": 0.5258, "step": 2364 }, { "epoch": 0.5275485166183359, "grad_norm": 0.41077789664268494, "learning_rate": 1.8613798798990806e-05, "loss": 0.4971, "step": 2365 }, { "epoch": 0.5277715815302253, "grad_norm": 0.1597769409418106, "learning_rate": 1.8612603109450437e-05, "loss": 0.4971, "step": 2366 }, { "epoch": 0.5279946464421147, "grad_norm": 0.15872445702552795, "learning_rate": 1.8611406942889934e-05, "loss": 0.4987, "step": 2367 }, { "epoch": 0.528217711354004, "grad_norm": 0.1601647585630417, "learning_rate": 1.861021029937555e-05, "loss": 0.4791, "step": 2368 }, { "epoch": 0.5284407762658934, "grad_norm": 0.1533002108335495, "learning_rate": 1.8609013178973555e-05, "loss": 0.4842, "step": 2369 }, { "epoch": 0.5286638411777828, "grad_norm": 0.16213175654411316, "learning_rate": 1.8607815581750257e-05, "loss": 0.5063, "step": 2370 }, { "epoch": 0.5288869060896721, "grad_norm": 0.1895373910665512, "learning_rate": 1.860661750777199e-05, "loss": 0.4885, "step": 2371 }, { "epoch": 0.5291099710015614, "grad_norm": 0.1578717827796936, "learning_rate": 1.8605418957105105e-05, "loss": 0.4541, "step": 2372 }, { "epoch": 0.5293330359134508, "grad_norm": 0.17269465327262878, "learning_rate": 1.8604219929815987e-05, "loss": 0.5234, "step": 2373 }, { "epoch": 0.5295561008253402, "grad_norm": 0.16912835836410522, "learning_rate": 1.860302042597105e-05, "loss": 0.5295, "step": 2374 }, { "epoch": 0.5297791657372295, "grad_norm": 0.16457562148571014, "learning_rate": 1.8601820445636722e-05, "loss": 0.5096, "step": 2375 }, { "epoch": 0.5300022306491189, "grad_norm": 0.17353565990924835, "learning_rate": 1.860061998887947e-05, "loss": 0.5086, "step": 2376 }, { "epoch": 0.5302252955610083, "grad_norm": 0.1719389110803604, "learning_rate": 1.859941905576579e-05, "loss": 0.5045, "step": 2377 }, { "epoch": 0.5304483604728976, "grad_norm": 0.15863987803459167, "learning_rate": 1.8598217646362183e-05, "loss": 0.5007, "step": 2378 }, { "epoch": 0.530671425384787, "grad_norm": 0.16614408791065216, "learning_rate": 1.85970157607352e-05, "loss": 0.5123, "step": 2379 }, { "epoch": 0.5308944902966763, "grad_norm": 0.1657867580652237, "learning_rate": 1.859581339895141e-05, "loss": 0.5237, "step": 2380 }, { "epoch": 0.5311175552085657, "grad_norm": 0.1731244921684265, "learning_rate": 1.85946105610774e-05, "loss": 0.5407, "step": 2381 }, { "epoch": 0.531340620120455, "grad_norm": 0.15216992795467377, "learning_rate": 1.85934072471798e-05, "loss": 0.4709, "step": 2382 }, { "epoch": 0.5315636850323444, "grad_norm": 0.1583755612373352, "learning_rate": 1.8592203457325248e-05, "loss": 0.491, "step": 2383 }, { "epoch": 0.5317867499442338, "grad_norm": 0.16302180290222168, "learning_rate": 1.859099919158042e-05, "loss": 0.4984, "step": 2384 }, { "epoch": 0.5320098148561231, "grad_norm": 0.3964548110961914, "learning_rate": 1.858979445001202e-05, "loss": 0.5208, "step": 2385 }, { "epoch": 0.5322328797680125, "grad_norm": 0.1690683513879776, "learning_rate": 1.8588589232686768e-05, "loss": 0.4932, "step": 2386 }, { "epoch": 0.5324559446799019, "grad_norm": 0.18451258540153503, "learning_rate": 1.8587383539671424e-05, "loss": 0.5261, "step": 2387 }, { "epoch": 0.5326790095917913, "grad_norm": 0.18741649389266968, "learning_rate": 1.858617737103276e-05, "loss": 0.499, "step": 2388 }, { "epoch": 0.5329020745036805, "grad_norm": 0.16601239144802094, "learning_rate": 1.8584970726837587e-05, "loss": 0.5176, "step": 2389 }, { "epoch": 0.5331251394155699, "grad_norm": 0.16434918344020844, "learning_rate": 1.858376360715273e-05, "loss": 0.497, "step": 2390 }, { "epoch": 0.5333482043274593, "grad_norm": 0.16978149116039276, "learning_rate": 1.8582556012045053e-05, "loss": 0.4946, "step": 2391 }, { "epoch": 0.5335712692393486, "grad_norm": 0.17810384929180145, "learning_rate": 1.8581347941581438e-05, "loss": 0.5223, "step": 2392 }, { "epoch": 0.533794334151238, "grad_norm": 0.1722012311220169, "learning_rate": 1.8580139395828795e-05, "loss": 0.5082, "step": 2393 }, { "epoch": 0.5340173990631274, "grad_norm": 0.16442382335662842, "learning_rate": 1.857893037485406e-05, "loss": 0.4838, "step": 2394 }, { "epoch": 0.5342404639750167, "grad_norm": 0.17343981564044952, "learning_rate": 1.8577720878724195e-05, "loss": 0.5024, "step": 2395 }, { "epoch": 0.5344635288869061, "grad_norm": 0.1579180806875229, "learning_rate": 1.8576510907506192e-05, "loss": 0.5168, "step": 2396 }, { "epoch": 0.5346865937987955, "grad_norm": 0.1650589406490326, "learning_rate": 1.8575300461267073e-05, "loss": 0.5063, "step": 2397 }, { "epoch": 0.5349096587106849, "grad_norm": 0.17452824115753174, "learning_rate": 1.8574089540073868e-05, "loss": 0.5272, "step": 2398 }, { "epoch": 0.5351327236225741, "grad_norm": 0.16344332695007324, "learning_rate": 1.8572878143993652e-05, "loss": 0.4876, "step": 2399 }, { "epoch": 0.5353557885344635, "grad_norm": 0.16399069130420685, "learning_rate": 1.857166627309352e-05, "loss": 0.4929, "step": 2400 }, { "epoch": 0.5355788534463529, "grad_norm": 0.1590677946805954, "learning_rate": 1.857045392744059e-05, "loss": 0.4672, "step": 2401 }, { "epoch": 0.5358019183582422, "grad_norm": 0.15974955260753632, "learning_rate": 1.8569241107102014e-05, "loss": 0.504, "step": 2402 }, { "epoch": 0.5360249832701316, "grad_norm": 0.15898270905017853, "learning_rate": 1.856802781214496e-05, "loss": 0.5031, "step": 2403 }, { "epoch": 0.536248048182021, "grad_norm": 0.15949919819831848, "learning_rate": 1.856681404263663e-05, "loss": 0.4957, "step": 2404 }, { "epoch": 0.5364711130939104, "grad_norm": 0.1708550751209259, "learning_rate": 1.8565599798644253e-05, "loss": 0.5325, "step": 2405 }, { "epoch": 0.5366941780057997, "grad_norm": 0.1631123423576355, "learning_rate": 1.856438508023508e-05, "loss": 0.5044, "step": 2406 }, { "epoch": 0.536917242917689, "grad_norm": 0.17329680919647217, "learning_rate": 1.8563169887476386e-05, "loss": 0.4905, "step": 2407 }, { "epoch": 0.5371403078295784, "grad_norm": 0.1555010825395584, "learning_rate": 1.8561954220435483e-05, "loss": 0.4896, "step": 2408 }, { "epoch": 0.5373633727414677, "grad_norm": 0.1617012768983841, "learning_rate": 1.85607380791797e-05, "loss": 0.5199, "step": 2409 }, { "epoch": 0.5375864376533571, "grad_norm": 0.16019612550735474, "learning_rate": 1.8559521463776388e-05, "loss": 0.5331, "step": 2410 }, { "epoch": 0.5378095025652465, "grad_norm": 0.15812602639198303, "learning_rate": 1.855830437429294e-05, "loss": 0.4946, "step": 2411 }, { "epoch": 0.5380325674771358, "grad_norm": 0.16577614843845367, "learning_rate": 1.8557086810796756e-05, "loss": 0.5049, "step": 2412 }, { "epoch": 0.5382556323890252, "grad_norm": 3.4729576110839844, "learning_rate": 1.8555868773355283e-05, "loss": 0.5572, "step": 2413 }, { "epoch": 0.5384786973009146, "grad_norm": 0.177010640501976, "learning_rate": 1.8554650262035975e-05, "loss": 0.4871, "step": 2414 }, { "epoch": 0.538701762212804, "grad_norm": 0.16966521739959717, "learning_rate": 1.8553431276906328e-05, "loss": 0.5043, "step": 2415 }, { "epoch": 0.5389248271246933, "grad_norm": 0.16035056114196777, "learning_rate": 1.855221181803385e-05, "loss": 0.4951, "step": 2416 }, { "epoch": 0.5391478920365826, "grad_norm": 0.1640123724937439, "learning_rate": 1.8550991885486093e-05, "loss": 0.5175, "step": 2417 }, { "epoch": 0.539370956948472, "grad_norm": 0.16451038420200348, "learning_rate": 1.8549771479330612e-05, "loss": 0.5192, "step": 2418 }, { "epoch": 0.5395940218603613, "grad_norm": 0.16037864983081818, "learning_rate": 1.8548550599635007e-05, "loss": 0.5174, "step": 2419 }, { "epoch": 0.5398170867722507, "grad_norm": 0.1533394753932953, "learning_rate": 1.85473292464669e-05, "loss": 0.502, "step": 2420 }, { "epoch": 0.5400401516841401, "grad_norm": 0.16923432052135468, "learning_rate": 1.854610741989393e-05, "loss": 0.4917, "step": 2421 }, { "epoch": 0.5402632165960295, "grad_norm": 0.16241200268268585, "learning_rate": 1.8544885119983774e-05, "loss": 0.4922, "step": 2422 }, { "epoch": 0.5404862815079188, "grad_norm": 0.1572006493806839, "learning_rate": 1.8543662346804138e-05, "loss": 0.4771, "step": 2423 }, { "epoch": 0.5407093464198082, "grad_norm": 0.18193307518959045, "learning_rate": 1.8542439100422733e-05, "loss": 0.512, "step": 2424 }, { "epoch": 0.5409324113316976, "grad_norm": 0.1868167519569397, "learning_rate": 1.8541215380907317e-05, "loss": 0.5112, "step": 2425 }, { "epoch": 0.5411554762435868, "grad_norm": 0.1707518994808197, "learning_rate": 1.8539991188325664e-05, "loss": 0.4923, "step": 2426 }, { "epoch": 0.5413785411554762, "grad_norm": 0.16489239037036896, "learning_rate": 1.8538766522745587e-05, "loss": 0.4787, "step": 2427 }, { "epoch": 0.5416016060673656, "grad_norm": 0.16489636898040771, "learning_rate": 1.8537541384234906e-05, "loss": 0.4966, "step": 2428 }, { "epoch": 0.541824670979255, "grad_norm": 0.17048819363117218, "learning_rate": 1.853631577286148e-05, "loss": 0.5079, "step": 2429 }, { "epoch": 0.5420477358911443, "grad_norm": 0.16793492436408997, "learning_rate": 1.853508968869319e-05, "loss": 0.5122, "step": 2430 }, { "epoch": 0.5422708008030337, "grad_norm": 0.16388513147830963, "learning_rate": 1.8533863131797948e-05, "loss": 0.4653, "step": 2431 }, { "epoch": 0.5424938657149231, "grad_norm": 0.16700832545757294, "learning_rate": 1.853263610224368e-05, "loss": 0.5208, "step": 2432 }, { "epoch": 0.5427169306268124, "grad_norm": 0.17072373628616333, "learning_rate": 1.8531408600098356e-05, "loss": 0.5032, "step": 2433 }, { "epoch": 0.5429399955387018, "grad_norm": 0.16046811640262604, "learning_rate": 1.8530180625429958e-05, "loss": 0.515, "step": 2434 }, { "epoch": 0.5431630604505912, "grad_norm": 0.16855685412883759, "learning_rate": 1.8528952178306504e-05, "loss": 0.4915, "step": 2435 }, { "epoch": 0.5433861253624804, "grad_norm": 0.17628896236419678, "learning_rate": 1.8527723258796025e-05, "loss": 0.5221, "step": 2436 }, { "epoch": 0.5436091902743698, "grad_norm": 0.1593201607465744, "learning_rate": 1.852649386696659e-05, "loss": 0.5027, "step": 2437 }, { "epoch": 0.5438322551862592, "grad_norm": 0.18933804333209991, "learning_rate": 1.852526400288629e-05, "loss": 0.5187, "step": 2438 }, { "epoch": 0.5440553200981486, "grad_norm": 0.16613252460956573, "learning_rate": 1.852403366662325e-05, "loss": 0.4939, "step": 2439 }, { "epoch": 0.5442783850100379, "grad_norm": 0.16360655426979065, "learning_rate": 1.85228028582456e-05, "loss": 0.4835, "step": 2440 }, { "epoch": 0.5445014499219273, "grad_norm": 0.1624102145433426, "learning_rate": 1.8521571577821522e-05, "loss": 0.5308, "step": 2441 }, { "epoch": 0.5447245148338167, "grad_norm": 0.16573922336101532, "learning_rate": 1.8520339825419204e-05, "loss": 0.5148, "step": 2442 }, { "epoch": 0.544947579745706, "grad_norm": 0.17061501741409302, "learning_rate": 1.8519107601106875e-05, "loss": 0.5025, "step": 2443 }, { "epoch": 0.5451706446575953, "grad_norm": 0.16382652521133423, "learning_rate": 1.851787490495278e-05, "loss": 0.5214, "step": 2444 }, { "epoch": 0.5453937095694847, "grad_norm": 0.16731955111026764, "learning_rate": 1.8516641737025187e-05, "loss": 0.4915, "step": 2445 }, { "epoch": 0.5456167744813741, "grad_norm": 0.1665150374174118, "learning_rate": 1.8515408097392408e-05, "loss": 0.4585, "step": 2446 }, { "epoch": 0.5458398393932634, "grad_norm": 0.16348014771938324, "learning_rate": 1.851417398612276e-05, "loss": 0.5042, "step": 2447 }, { "epoch": 0.5460629043051528, "grad_norm": 0.1641550064086914, "learning_rate": 1.85129394032846e-05, "loss": 0.5103, "step": 2448 }, { "epoch": 0.5462859692170422, "grad_norm": 0.16449302434921265, "learning_rate": 1.8511704348946314e-05, "loss": 0.5043, "step": 2449 }, { "epoch": 0.5465090341289315, "grad_norm": 0.1623837649822235, "learning_rate": 1.85104688231763e-05, "loss": 0.4831, "step": 2450 }, { "epoch": 0.5467320990408209, "grad_norm": 0.16712717711925507, "learning_rate": 1.8509232826042983e-05, "loss": 0.5146, "step": 2451 }, { "epoch": 0.5469551639527103, "grad_norm": 0.14933447539806366, "learning_rate": 1.850799635761483e-05, "loss": 0.5011, "step": 2452 }, { "epoch": 0.5471782288645995, "grad_norm": 0.17392055690288544, "learning_rate": 1.8506759417960322e-05, "loss": 0.482, "step": 2453 }, { "epoch": 0.5474012937764889, "grad_norm": 0.1616058647632599, "learning_rate": 1.850552200714797e-05, "loss": 0.5152, "step": 2454 }, { "epoch": 0.5476243586883783, "grad_norm": 0.16296276450157166, "learning_rate": 1.8504284125246304e-05, "loss": 0.5073, "step": 2455 }, { "epoch": 0.5478474236002677, "grad_norm": 0.16247029602527618, "learning_rate": 1.850304577232389e-05, "loss": 0.5194, "step": 2456 }, { "epoch": 0.548070488512157, "grad_norm": 0.16784507036209106, "learning_rate": 1.8501806948449316e-05, "loss": 0.5212, "step": 2457 }, { "epoch": 0.5482935534240464, "grad_norm": 0.1538800299167633, "learning_rate": 1.8500567653691192e-05, "loss": 0.4907, "step": 2458 }, { "epoch": 0.5485166183359358, "grad_norm": 0.1586543768644333, "learning_rate": 1.8499327888118163e-05, "loss": 0.4873, "step": 2459 }, { "epoch": 0.5487396832478251, "grad_norm": 0.15387628972530365, "learning_rate": 1.8498087651798893e-05, "loss": 0.5102, "step": 2460 }, { "epoch": 0.5489627481597145, "grad_norm": 0.15858915448188782, "learning_rate": 1.8496846944802072e-05, "loss": 0.4983, "step": 2461 }, { "epoch": 0.5491858130716039, "grad_norm": 0.15983784198760986, "learning_rate": 1.849560576719642e-05, "loss": 0.4728, "step": 2462 }, { "epoch": 0.5494088779834932, "grad_norm": 0.1605166792869568, "learning_rate": 1.849436411905068e-05, "loss": 0.5083, "step": 2463 }, { "epoch": 0.5496319428953825, "grad_norm": 0.15921124815940857, "learning_rate": 1.8493122000433628e-05, "loss": 0.5054, "step": 2464 }, { "epoch": 0.5498550078072719, "grad_norm": 0.15854284167289734, "learning_rate": 1.849187941141405e-05, "loss": 0.5111, "step": 2465 }, { "epoch": 0.5500780727191613, "grad_norm": 0.16846506297588348, "learning_rate": 1.8490636352060778e-05, "loss": 0.526, "step": 2466 }, { "epoch": 0.5503011376310506, "grad_norm": 0.1619185209274292, "learning_rate": 1.8489392822442657e-05, "loss": 0.4965, "step": 2467 }, { "epoch": 0.55052420254294, "grad_norm": 0.17305007576942444, "learning_rate": 1.8488148822628557e-05, "loss": 0.5171, "step": 2468 }, { "epoch": 0.5507472674548294, "grad_norm": 0.16538777947425842, "learning_rate": 1.8486904352687384e-05, "loss": 0.5187, "step": 2469 }, { "epoch": 0.5509703323667187, "grad_norm": 0.1914980560541153, "learning_rate": 1.8485659412688065e-05, "loss": 0.5105, "step": 2470 }, { "epoch": 0.551193397278608, "grad_norm": 0.16565662622451782, "learning_rate": 1.8484414002699552e-05, "loss": 0.4949, "step": 2471 }, { "epoch": 0.5514164621904974, "grad_norm": 0.17171315848827362, "learning_rate": 1.848316812279082e-05, "loss": 0.507, "step": 2472 }, { "epoch": 0.5516395271023868, "grad_norm": 0.1598774790763855, "learning_rate": 1.8481921773030878e-05, "loss": 0.5101, "step": 2473 }, { "epoch": 0.5518625920142761, "grad_norm": 0.16791561245918274, "learning_rate": 1.8480674953488752e-05, "loss": 0.4902, "step": 2474 }, { "epoch": 0.5520856569261655, "grad_norm": 0.17411862313747406, "learning_rate": 1.8479427664233505e-05, "loss": 0.5017, "step": 2475 }, { "epoch": 0.5523087218380549, "grad_norm": 0.16108182072639465, "learning_rate": 1.8478179905334213e-05, "loss": 0.4886, "step": 2476 }, { "epoch": 0.5525317867499442, "grad_norm": 0.1858782172203064, "learning_rate": 1.847693167685999e-05, "loss": 0.5047, "step": 2477 }, { "epoch": 0.5527548516618336, "grad_norm": 0.17761607468128204, "learning_rate": 1.847568297887997e-05, "loss": 0.5233, "step": 2478 }, { "epoch": 0.552977916573723, "grad_norm": 0.1658019870519638, "learning_rate": 1.8474433811463307e-05, "loss": 0.5263, "step": 2479 }, { "epoch": 0.5532009814856124, "grad_norm": 1.4999321699142456, "learning_rate": 1.84731841746792e-05, "loss": 0.5359, "step": 2480 }, { "epoch": 0.5534240463975016, "grad_norm": 0.17992182075977325, "learning_rate": 1.847193406859685e-05, "loss": 0.5033, "step": 2481 }, { "epoch": 0.553647111309391, "grad_norm": 0.17310257256031036, "learning_rate": 1.8470683493285503e-05, "loss": 0.4905, "step": 2482 }, { "epoch": 0.5538701762212804, "grad_norm": 0.16551616787910461, "learning_rate": 1.846943244881442e-05, "loss": 0.4925, "step": 2483 }, { "epoch": 0.5540932411331697, "grad_norm": 0.17117410898208618, "learning_rate": 1.846818093525289e-05, "loss": 0.5128, "step": 2484 }, { "epoch": 0.5543163060450591, "grad_norm": 0.15640629827976227, "learning_rate": 1.8466928952670242e-05, "loss": 0.5054, "step": 2485 }, { "epoch": 0.5545393709569485, "grad_norm": 0.16013699769973755, "learning_rate": 1.8465676501135804e-05, "loss": 0.4966, "step": 2486 }, { "epoch": 0.5547624358688378, "grad_norm": 0.16586218774318695, "learning_rate": 1.846442358071895e-05, "loss": 0.506, "step": 2487 }, { "epoch": 0.5549855007807272, "grad_norm": 0.1749541014432907, "learning_rate": 1.8463170191489075e-05, "loss": 0.4733, "step": 2488 }, { "epoch": 0.5552085656926166, "grad_norm": 0.16143982112407684, "learning_rate": 1.84619163335156e-05, "loss": 0.5014, "step": 2489 }, { "epoch": 0.555431630604506, "grad_norm": 0.1761699765920639, "learning_rate": 1.846066200686797e-05, "loss": 0.5305, "step": 2490 }, { "epoch": 0.5556546955163952, "grad_norm": 0.25962182879447937, "learning_rate": 1.8459407211615658e-05, "loss": 0.5047, "step": 2491 }, { "epoch": 0.5558777604282846, "grad_norm": 0.15808863937854767, "learning_rate": 1.8458151947828165e-05, "loss": 0.5079, "step": 2492 }, { "epoch": 0.556100825340174, "grad_norm": 0.1679726392030716, "learning_rate": 1.8456896215575013e-05, "loss": 0.5097, "step": 2493 }, { "epoch": 0.5563238902520633, "grad_norm": 0.15282638370990753, "learning_rate": 1.845564001492575e-05, "loss": 0.5196, "step": 2494 }, { "epoch": 0.5565469551639527, "grad_norm": 0.1774146854877472, "learning_rate": 1.8454383345949954e-05, "loss": 0.5227, "step": 2495 }, { "epoch": 0.5567700200758421, "grad_norm": 0.1605686992406845, "learning_rate": 1.8453126208717235e-05, "loss": 0.4751, "step": 2496 }, { "epoch": 0.5569930849877315, "grad_norm": 0.16172072291374207, "learning_rate": 1.845186860329721e-05, "loss": 0.5158, "step": 2497 }, { "epoch": 0.5572161498996208, "grad_norm": 0.16925646364688873, "learning_rate": 1.8450610529759535e-05, "loss": 0.5148, "step": 2498 }, { "epoch": 0.5574392148115102, "grad_norm": 0.16925497353076935, "learning_rate": 1.8449351988173894e-05, "loss": 0.4947, "step": 2499 }, { "epoch": 0.5576622797233995, "grad_norm": 1.354637861251831, "learning_rate": 1.8448092978609993e-05, "loss": 0.5136, "step": 2500 }, { "epoch": 0.5578853446352888, "grad_norm": 0.16736604273319244, "learning_rate": 1.844683350113756e-05, "loss": 0.5389, "step": 2501 }, { "epoch": 0.5581084095471782, "grad_norm": 0.16498203575611115, "learning_rate": 1.8445573555826355e-05, "loss": 0.5018, "step": 2502 }, { "epoch": 0.5583314744590676, "grad_norm": 0.17059291899204254, "learning_rate": 1.8444313142746164e-05, "loss": 0.5282, "step": 2503 }, { "epoch": 0.558554539370957, "grad_norm": 0.21622738242149353, "learning_rate": 1.844305226196679e-05, "loss": 0.4923, "step": 2504 }, { "epoch": 0.5587776042828463, "grad_norm": 0.16687920689582825, "learning_rate": 1.844179091355808e-05, "loss": 0.4998, "step": 2505 }, { "epoch": 0.5590006691947357, "grad_norm": 0.1704476922750473, "learning_rate": 1.8440529097589885e-05, "loss": 0.5267, "step": 2506 }, { "epoch": 0.5592237341066251, "grad_norm": 0.16384254395961761, "learning_rate": 1.8439266814132092e-05, "loss": 0.5053, "step": 2507 }, { "epoch": 0.5594467990185144, "grad_norm": 0.16077467799186707, "learning_rate": 1.843800406325462e-05, "loss": 0.5151, "step": 2508 }, { "epoch": 0.5596698639304037, "grad_norm": 0.16944189369678497, "learning_rate": 1.843674084502741e-05, "loss": 0.5054, "step": 2509 }, { "epoch": 0.5598929288422931, "grad_norm": 0.16107740998268127, "learning_rate": 1.8435477159520418e-05, "loss": 0.4861, "step": 2510 }, { "epoch": 0.5601159937541824, "grad_norm": 0.17457795143127441, "learning_rate": 1.843421300680364e-05, "loss": 0.486, "step": 2511 }, { "epoch": 0.5603390586660718, "grad_norm": 0.16926448047161102, "learning_rate": 1.8432948386947092e-05, "loss": 0.5238, "step": 2512 }, { "epoch": 0.5605621235779612, "grad_norm": 0.16914579272270203, "learning_rate": 1.8431683300020817e-05, "loss": 0.5021, "step": 2513 }, { "epoch": 0.5607851884898506, "grad_norm": 0.16727939248085022, "learning_rate": 1.8430417746094886e-05, "loss": 0.53, "step": 2514 }, { "epoch": 0.5610082534017399, "grad_norm": 0.1788937747478485, "learning_rate": 1.842915172523939e-05, "loss": 0.5226, "step": 2515 }, { "epoch": 0.5612313183136293, "grad_norm": 0.17947039008140564, "learning_rate": 1.8427885237524446e-05, "loss": 0.4914, "step": 2516 }, { "epoch": 0.5614543832255187, "grad_norm": 0.17127928137779236, "learning_rate": 1.842661828302021e-05, "loss": 0.4802, "step": 2517 }, { "epoch": 0.5616774481374079, "grad_norm": 0.20038791000843048, "learning_rate": 1.8425350861796845e-05, "loss": 0.4911, "step": 2518 }, { "epoch": 0.5619005130492973, "grad_norm": 0.16238313913345337, "learning_rate": 1.842408297392455e-05, "loss": 0.492, "step": 2519 }, { "epoch": 0.5621235779611867, "grad_norm": 0.17273001372814178, "learning_rate": 1.8422814619473556e-05, "loss": 0.555, "step": 2520 }, { "epoch": 0.5623466428730761, "grad_norm": 0.19861721992492676, "learning_rate": 1.84215457985141e-05, "loss": 0.5245, "step": 2521 }, { "epoch": 0.5625697077849654, "grad_norm": 0.15922409296035767, "learning_rate": 1.8420276511116467e-05, "loss": 0.4968, "step": 2522 }, { "epoch": 0.5627927726968548, "grad_norm": 0.15657752752304077, "learning_rate": 1.8419006757350956e-05, "loss": 0.4923, "step": 2523 }, { "epoch": 0.5630158376087442, "grad_norm": 0.1599670946598053, "learning_rate": 1.8417736537287893e-05, "loss": 0.5381, "step": 2524 }, { "epoch": 0.5632389025206335, "grad_norm": 0.16177695989608765, "learning_rate": 1.841646585099763e-05, "loss": 0.5025, "step": 2525 }, { "epoch": 0.5634619674325229, "grad_norm": 0.17016616463661194, "learning_rate": 1.8415194698550548e-05, "loss": 0.5024, "step": 2526 }, { "epoch": 0.5636850323444123, "grad_norm": 0.15452872216701508, "learning_rate": 1.8413923080017047e-05, "loss": 0.4838, "step": 2527 }, { "epoch": 0.5639080972563015, "grad_norm": 0.17372727394104004, "learning_rate": 1.8412650995467564e-05, "loss": 0.5272, "step": 2528 }, { "epoch": 0.5641311621681909, "grad_norm": 0.20483483374118805, "learning_rate": 1.8411378444972548e-05, "loss": 0.4984, "step": 2529 }, { "epoch": 0.5643542270800803, "grad_norm": 0.16677002608776093, "learning_rate": 1.8410105428602485e-05, "loss": 0.5217, "step": 2530 }, { "epoch": 0.5645772919919697, "grad_norm": 0.16841678321361542, "learning_rate": 1.840883194642788e-05, "loss": 0.5331, "step": 2531 }, { "epoch": 0.564800356903859, "grad_norm": 0.16345760226249695, "learning_rate": 1.8407557998519273e-05, "loss": 0.5372, "step": 2532 }, { "epoch": 0.5650234218157484, "grad_norm": 0.16982464492321014, "learning_rate": 1.840628358494721e-05, "loss": 0.4795, "step": 2533 }, { "epoch": 0.5652464867276378, "grad_norm": 0.16752153635025024, "learning_rate": 1.840500870578229e-05, "loss": 0.5088, "step": 2534 }, { "epoch": 0.5654695516395271, "grad_norm": 0.16022369265556335, "learning_rate": 1.840373336109512e-05, "loss": 0.4676, "step": 2535 }, { "epoch": 0.5656926165514164, "grad_norm": 0.15749748051166534, "learning_rate": 1.8402457550956336e-05, "loss": 0.5325, "step": 2536 }, { "epoch": 0.5659156814633058, "grad_norm": 0.16572564840316772, "learning_rate": 1.8401181275436596e-05, "loss": 0.4972, "step": 2537 }, { "epoch": 0.5661387463751952, "grad_norm": 0.15949216485023499, "learning_rate": 1.839990453460659e-05, "loss": 0.5088, "step": 2538 }, { "epoch": 0.5663618112870845, "grad_norm": 0.15577349066734314, "learning_rate": 1.8398627328537037e-05, "loss": 0.4871, "step": 2539 }, { "epoch": 0.5665848761989739, "grad_norm": 0.22663244605064392, "learning_rate": 1.839734965729867e-05, "loss": 0.51, "step": 2540 }, { "epoch": 0.5668079411108633, "grad_norm": 0.16689926385879517, "learning_rate": 1.8396071520962256e-05, "loss": 0.4998, "step": 2541 }, { "epoch": 0.5670310060227526, "grad_norm": 0.16619545221328735, "learning_rate": 1.8394792919598592e-05, "loss": 0.5208, "step": 2542 }, { "epoch": 0.567254070934642, "grad_norm": 0.15518589317798615, "learning_rate": 1.8393513853278492e-05, "loss": 0.4967, "step": 2543 }, { "epoch": 0.5674771358465314, "grad_norm": 0.19833387434482574, "learning_rate": 1.8392234322072792e-05, "loss": 0.5029, "step": 2544 }, { "epoch": 0.5677002007584206, "grad_norm": 0.15093770623207092, "learning_rate": 1.839095432605237e-05, "loss": 0.5017, "step": 2545 }, { "epoch": 0.56792326567031, "grad_norm": 0.19351163506507874, "learning_rate": 1.8389673865288114e-05, "loss": 0.494, "step": 2546 }, { "epoch": 0.5681463305821994, "grad_norm": 0.16114504635334015, "learning_rate": 1.8388392939850946e-05, "loss": 0.5118, "step": 2547 }, { "epoch": 0.5683693954940888, "grad_norm": 0.16146999597549438, "learning_rate": 1.8387111549811812e-05, "loss": 0.4732, "step": 2548 }, { "epoch": 0.5685924604059781, "grad_norm": 0.1651589721441269, "learning_rate": 1.8385829695241687e-05, "loss": 0.5086, "step": 2549 }, { "epoch": 0.5688155253178675, "grad_norm": 0.15596236288547516, "learning_rate": 1.838454737621156e-05, "loss": 0.5017, "step": 2550 }, { "epoch": 0.5690385902297569, "grad_norm": 0.1615191251039505, "learning_rate": 1.838326459279246e-05, "loss": 0.5212, "step": 2551 }, { "epoch": 0.5692616551416462, "grad_norm": 0.16675913333892822, "learning_rate": 1.8381981345055435e-05, "loss": 0.5229, "step": 2552 }, { "epoch": 0.5694847200535356, "grad_norm": 0.16236376762390137, "learning_rate": 1.8380697633071558e-05, "loss": 0.4955, "step": 2553 }, { "epoch": 0.569707784965425, "grad_norm": 0.17616188526153564, "learning_rate": 1.837941345691193e-05, "loss": 0.5067, "step": 2554 }, { "epoch": 0.5699308498773143, "grad_norm": 0.16186662018299103, "learning_rate": 1.8378128816647676e-05, "loss": 0.5054, "step": 2555 }, { "epoch": 0.5701539147892036, "grad_norm": 0.1674317866563797, "learning_rate": 1.8376843712349946e-05, "loss": 0.5009, "step": 2556 }, { "epoch": 0.570376979701093, "grad_norm": 0.17086781561374664, "learning_rate": 1.837555814408992e-05, "loss": 0.4998, "step": 2557 }, { "epoch": 0.5706000446129824, "grad_norm": 0.1624806523323059, "learning_rate": 1.8374272111938797e-05, "loss": 0.519, "step": 2558 }, { "epoch": 0.5708231095248717, "grad_norm": 0.1701088398694992, "learning_rate": 1.837298561596781e-05, "loss": 0.5173, "step": 2559 }, { "epoch": 0.5710461744367611, "grad_norm": 0.15940968692302704, "learning_rate": 1.8371698656248212e-05, "loss": 0.515, "step": 2560 }, { "epoch": 0.5712692393486505, "grad_norm": 0.17150092124938965, "learning_rate": 1.837041123285128e-05, "loss": 0.4826, "step": 2561 }, { "epoch": 0.5714923042605398, "grad_norm": 0.16845233738422394, "learning_rate": 1.836912334584833e-05, "loss": 0.5018, "step": 2562 }, { "epoch": 0.5717153691724292, "grad_norm": 0.18033871054649353, "learning_rate": 1.8367834995310676e-05, "loss": 0.5177, "step": 2563 }, { "epoch": 0.5719384340843185, "grad_norm": 0.17002245783805847, "learning_rate": 1.8366546181309686e-05, "loss": 0.5287, "step": 2564 }, { "epoch": 0.5721614989962079, "grad_norm": 0.16821357607841492, "learning_rate": 1.836525690391674e-05, "loss": 0.522, "step": 2565 }, { "epoch": 0.5723845639080972, "grad_norm": 0.1578892022371292, "learning_rate": 1.836396716320325e-05, "loss": 0.4929, "step": 2566 }, { "epoch": 0.5726076288199866, "grad_norm": 0.15691736340522766, "learning_rate": 1.836267695924065e-05, "loss": 0.4775, "step": 2567 }, { "epoch": 0.572830693731876, "grad_norm": 0.16604529321193695, "learning_rate": 1.8361386292100394e-05, "loss": 0.4854, "step": 2568 }, { "epoch": 0.5730537586437653, "grad_norm": 0.16472390294075012, "learning_rate": 1.8360095161853966e-05, "loss": 0.486, "step": 2569 }, { "epoch": 0.5732768235556547, "grad_norm": 0.1708393096923828, "learning_rate": 1.8358803568572885e-05, "loss": 0.5086, "step": 2570 }, { "epoch": 0.5734998884675441, "grad_norm": 0.1598411649465561, "learning_rate": 1.8357511512328683e-05, "loss": 0.5071, "step": 2571 }, { "epoch": 0.5737229533794335, "grad_norm": 0.19416102766990662, "learning_rate": 1.8356218993192922e-05, "loss": 0.5092, "step": 2572 }, { "epoch": 0.5739460182913227, "grad_norm": 0.1696127951145172, "learning_rate": 1.835492601123719e-05, "loss": 0.5157, "step": 2573 }, { "epoch": 0.5741690832032121, "grad_norm": 0.16586486995220184, "learning_rate": 1.8353632566533102e-05, "loss": 0.4948, "step": 2574 }, { "epoch": 0.5743921481151015, "grad_norm": 0.17883430421352386, "learning_rate": 1.8352338659152296e-05, "loss": 0.4989, "step": 2575 }, { "epoch": 0.5746152130269908, "grad_norm": 0.15909375250339508, "learning_rate": 1.8351044289166435e-05, "loss": 0.4892, "step": 2576 }, { "epoch": 0.5748382779388802, "grad_norm": 0.17203538119792938, "learning_rate": 1.834974945664721e-05, "loss": 0.5039, "step": 2577 }, { "epoch": 0.5750613428507696, "grad_norm": 0.17230457067489624, "learning_rate": 1.834845416166634e-05, "loss": 0.5102, "step": 2578 }, { "epoch": 0.575284407762659, "grad_norm": 0.16863304376602173, "learning_rate": 1.8347158404295566e-05, "loss": 0.5365, "step": 2579 }, { "epoch": 0.5755074726745483, "grad_norm": 0.15985195338726044, "learning_rate": 1.8345862184606653e-05, "loss": 0.4991, "step": 2580 }, { "epoch": 0.5757305375864377, "grad_norm": 0.16000472009181976, "learning_rate": 1.8344565502671396e-05, "loss": 0.4898, "step": 2581 }, { "epoch": 0.575953602498327, "grad_norm": 0.16451792418956757, "learning_rate": 1.8343268358561607e-05, "loss": 0.5382, "step": 2582 }, { "epoch": 0.5761766674102163, "grad_norm": 0.16820542514324188, "learning_rate": 1.834197075234914e-05, "loss": 0.4993, "step": 2583 }, { "epoch": 0.5763997323221057, "grad_norm": 0.16512157022953033, "learning_rate": 1.834067268410586e-05, "loss": 0.5139, "step": 2584 }, { "epoch": 0.5766227972339951, "grad_norm": 0.16488297283649445, "learning_rate": 1.833937415390366e-05, "loss": 0.5371, "step": 2585 }, { "epoch": 0.5768458621458844, "grad_norm": 0.1621064841747284, "learning_rate": 1.8338075161814462e-05, "loss": 0.5218, "step": 2586 }, { "epoch": 0.5770689270577738, "grad_norm": 0.1790938526391983, "learning_rate": 1.8336775707910214e-05, "loss": 0.5357, "step": 2587 }, { "epoch": 0.5772919919696632, "grad_norm": 0.16097491979599, "learning_rate": 1.8335475792262888e-05, "loss": 0.4874, "step": 2588 }, { "epoch": 0.5775150568815526, "grad_norm": 0.17577522993087769, "learning_rate": 1.8334175414944476e-05, "loss": 0.5097, "step": 2589 }, { "epoch": 0.5777381217934419, "grad_norm": 0.17199388146400452, "learning_rate": 1.833287457602701e-05, "loss": 0.4969, "step": 2590 }, { "epoch": 0.5779611867053313, "grad_norm": 0.16499276459217072, "learning_rate": 1.833157327558253e-05, "loss": 0.5207, "step": 2591 }, { "epoch": 0.5781842516172206, "grad_norm": 0.16708426177501678, "learning_rate": 1.8330271513683118e-05, "loss": 0.5077, "step": 2592 }, { "epoch": 0.5784073165291099, "grad_norm": 0.1643514633178711, "learning_rate": 1.8328969290400867e-05, "loss": 0.4884, "step": 2593 }, { "epoch": 0.5786303814409993, "grad_norm": 0.1548931747674942, "learning_rate": 1.832766660580791e-05, "loss": 0.5047, "step": 2594 }, { "epoch": 0.5788534463528887, "grad_norm": 0.16089920699596405, "learning_rate": 1.832636345997639e-05, "loss": 0.4916, "step": 2595 }, { "epoch": 0.5790765112647781, "grad_norm": 0.15845981240272522, "learning_rate": 1.8325059852978485e-05, "loss": 0.4832, "step": 2596 }, { "epoch": 0.5792995761766674, "grad_norm": 0.1613757461309433, "learning_rate": 1.83237557848864e-05, "loss": 0.5192, "step": 2597 }, { "epoch": 0.5795226410885568, "grad_norm": 0.15937237441539764, "learning_rate": 1.8322451255772365e-05, "loss": 0.5028, "step": 2598 }, { "epoch": 0.5797457060004462, "grad_norm": 0.16132612526416779, "learning_rate": 1.8321146265708627e-05, "loss": 0.4948, "step": 2599 }, { "epoch": 0.5799687709123355, "grad_norm": 0.18276172876358032, "learning_rate": 1.8319840814767463e-05, "loss": 0.4845, "step": 2600 }, { "epoch": 0.5801918358242248, "grad_norm": 0.1661817729473114, "learning_rate": 1.8318534903021182e-05, "loss": 0.5344, "step": 2601 }, { "epoch": 0.5804149007361142, "grad_norm": 0.15862146019935608, "learning_rate": 1.8317228530542117e-05, "loss": 0.5043, "step": 2602 }, { "epoch": 0.5806379656480035, "grad_norm": 0.15994656085968018, "learning_rate": 1.8315921697402618e-05, "loss": 0.4765, "step": 2603 }, { "epoch": 0.5808610305598929, "grad_norm": 0.16988401114940643, "learning_rate": 1.8314614403675063e-05, "loss": 0.5194, "step": 2604 }, { "epoch": 0.5810840954717823, "grad_norm": 0.16828812658786774, "learning_rate": 1.831330664943186e-05, "loss": 0.5357, "step": 2605 }, { "epoch": 0.5813071603836717, "grad_norm": 0.19693851470947266, "learning_rate": 1.8311998434745445e-05, "loss": 0.4976, "step": 2606 }, { "epoch": 0.581530225295561, "grad_norm": 0.1689445823431015, "learning_rate": 1.831068975968827e-05, "loss": 0.4992, "step": 2607 }, { "epoch": 0.5817532902074504, "grad_norm": 0.15751276910305023, "learning_rate": 1.830938062433282e-05, "loss": 0.4711, "step": 2608 }, { "epoch": 0.5819763551193398, "grad_norm": 0.16275669634342194, "learning_rate": 1.8308071028751608e-05, "loss": 0.5184, "step": 2609 }, { "epoch": 0.582199420031229, "grad_norm": 0.16536571085453033, "learning_rate": 1.8306760973017158e-05, "loss": 0.5172, "step": 2610 }, { "epoch": 0.5824224849431184, "grad_norm": 0.18836647272109985, "learning_rate": 1.830545045720203e-05, "loss": 0.5019, "step": 2611 }, { "epoch": 0.5826455498550078, "grad_norm": 0.16014382243156433, "learning_rate": 1.830413948137882e-05, "loss": 0.5182, "step": 2612 }, { "epoch": 0.5828686147668972, "grad_norm": 0.1588478833436966, "learning_rate": 1.8302828045620128e-05, "loss": 0.5072, "step": 2613 }, { "epoch": 0.5830916796787865, "grad_norm": 0.17403388023376465, "learning_rate": 1.830151614999859e-05, "loss": 0.5093, "step": 2614 }, { "epoch": 0.5833147445906759, "grad_norm": 0.17456629872322083, "learning_rate": 1.830020379458687e-05, "loss": 0.4922, "step": 2615 }, { "epoch": 0.5835378095025653, "grad_norm": 0.15623332560062408, "learning_rate": 1.829889097945765e-05, "loss": 0.5133, "step": 2616 }, { "epoch": 0.5837608744144546, "grad_norm": 0.18376080691814423, "learning_rate": 1.8297577704683653e-05, "loss": 0.5154, "step": 2617 }, { "epoch": 0.583983939326344, "grad_norm": 0.17060859501361847, "learning_rate": 1.8296263970337602e-05, "loss": 0.5058, "step": 2618 }, { "epoch": 0.5842070042382334, "grad_norm": 0.1654299646615982, "learning_rate": 1.829494977649227e-05, "loss": 0.5072, "step": 2619 }, { "epoch": 0.5844300691501226, "grad_norm": 0.1547902524471283, "learning_rate": 1.829363512322044e-05, "loss": 0.4967, "step": 2620 }, { "epoch": 0.584653134062012, "grad_norm": 0.1600826382637024, "learning_rate": 1.829232001059493e-05, "loss": 0.5396, "step": 2621 }, { "epoch": 0.5848761989739014, "grad_norm": 0.16009269654750824, "learning_rate": 1.8291004438688578e-05, "loss": 0.525, "step": 2622 }, { "epoch": 0.5850992638857908, "grad_norm": 0.2055627703666687, "learning_rate": 1.8289688407574246e-05, "loss": 0.4993, "step": 2623 }, { "epoch": 0.5853223287976801, "grad_norm": 0.1680591106414795, "learning_rate": 1.8288371917324827e-05, "loss": 0.5184, "step": 2624 }, { "epoch": 0.5855453937095695, "grad_norm": 0.1637965738773346, "learning_rate": 1.828705496801323e-05, "loss": 0.5205, "step": 2625 }, { "epoch": 0.5857684586214589, "grad_norm": 0.172703817486763, "learning_rate": 1.828573755971241e-05, "loss": 0.5201, "step": 2626 }, { "epoch": 0.5859915235333482, "grad_norm": 0.18580362200737, "learning_rate": 1.8284419692495316e-05, "loss": 0.5246, "step": 2627 }, { "epoch": 0.5862145884452375, "grad_norm": 0.16274447739124298, "learning_rate": 1.8283101366434954e-05, "loss": 0.5172, "step": 2628 }, { "epoch": 0.5864376533571269, "grad_norm": 0.16170893609523773, "learning_rate": 1.8281782581604334e-05, "loss": 0.506, "step": 2629 }, { "epoch": 0.5866607182690163, "grad_norm": 0.15339437127113342, "learning_rate": 1.82804633380765e-05, "loss": 0.4706, "step": 2630 }, { "epoch": 0.5868837831809056, "grad_norm": 0.17161330580711365, "learning_rate": 1.827914363592452e-05, "loss": 0.5217, "step": 2631 }, { "epoch": 0.587106848092795, "grad_norm": 0.16379369795322418, "learning_rate": 1.8277823475221485e-05, "loss": 0.4954, "step": 2632 }, { "epoch": 0.5873299130046844, "grad_norm": 0.16465184092521667, "learning_rate": 1.827650285604052e-05, "loss": 0.4995, "step": 2633 }, { "epoch": 0.5875529779165737, "grad_norm": 0.1681264042854309, "learning_rate": 1.8275181778454767e-05, "loss": 0.5391, "step": 2634 }, { "epoch": 0.5877760428284631, "grad_norm": 0.17729119956493378, "learning_rate": 1.827386024253739e-05, "loss": 0.5313, "step": 2635 }, { "epoch": 0.5879991077403525, "grad_norm": 0.1703905314207077, "learning_rate": 1.8272538248361592e-05, "loss": 0.5407, "step": 2636 }, { "epoch": 0.5882221726522417, "grad_norm": 0.1546456664800644, "learning_rate": 1.8271215796000588e-05, "loss": 0.4965, "step": 2637 }, { "epoch": 0.5884452375641311, "grad_norm": 0.15894514322280884, "learning_rate": 1.8269892885527624e-05, "loss": 0.498, "step": 2638 }, { "epoch": 0.5886683024760205, "grad_norm": 0.15367206931114197, "learning_rate": 1.826856951701597e-05, "loss": 0.482, "step": 2639 }, { "epoch": 0.5888913673879099, "grad_norm": 0.16719990968704224, "learning_rate": 1.826724569053893e-05, "loss": 0.5287, "step": 2640 }, { "epoch": 0.5891144322997992, "grad_norm": 0.1742510050535202, "learning_rate": 1.8265921406169816e-05, "loss": 0.4773, "step": 2641 }, { "epoch": 0.5893374972116886, "grad_norm": 0.16305537521839142, "learning_rate": 1.8264596663981985e-05, "loss": 0.47, "step": 2642 }, { "epoch": 0.589560562123578, "grad_norm": 0.1746242344379425, "learning_rate": 1.82632714640488e-05, "loss": 0.4978, "step": 2643 }, { "epoch": 0.5897836270354673, "grad_norm": 0.16970300674438477, "learning_rate": 1.8261945806443666e-05, "loss": 0.5225, "step": 2644 }, { "epoch": 0.5900066919473567, "grad_norm": 0.16862879693508148, "learning_rate": 1.826061969124e-05, "loss": 0.5299, "step": 2645 }, { "epoch": 0.5902297568592461, "grad_norm": 0.1616392582654953, "learning_rate": 1.825929311851126e-05, "loss": 0.5237, "step": 2646 }, { "epoch": 0.5904528217711354, "grad_norm": 0.15673169493675232, "learning_rate": 1.8257966088330907e-05, "loss": 0.5005, "step": 2647 }, { "epoch": 0.5906758866830247, "grad_norm": 0.16055789589881897, "learning_rate": 1.825663860077245e-05, "loss": 0.5179, "step": 2648 }, { "epoch": 0.5908989515949141, "grad_norm": 0.1535293161869049, "learning_rate": 1.8255310655909414e-05, "loss": 0.4926, "step": 2649 }, { "epoch": 0.5911220165068035, "grad_norm": 0.16450564563274384, "learning_rate": 1.8253982253815343e-05, "loss": 0.5016, "step": 2650 }, { "epoch": 0.5913450814186928, "grad_norm": 0.1581798493862152, "learning_rate": 1.8252653394563814e-05, "loss": 0.5031, "step": 2651 }, { "epoch": 0.5915681463305822, "grad_norm": 0.16955383121967316, "learning_rate": 1.825132407822843e-05, "loss": 0.5408, "step": 2652 }, { "epoch": 0.5917912112424716, "grad_norm": 0.16490530967712402, "learning_rate": 1.8249994304882818e-05, "loss": 0.5352, "step": 2653 }, { "epoch": 0.592014276154361, "grad_norm": 0.1716136932373047, "learning_rate": 1.8248664074600626e-05, "loss": 0.4936, "step": 2654 }, { "epoch": 0.5922373410662503, "grad_norm": 0.16612227261066437, "learning_rate": 1.8247333387455534e-05, "loss": 0.4886, "step": 2655 }, { "epoch": 0.5924604059781396, "grad_norm": 0.16571937501430511, "learning_rate": 1.8246002243521234e-05, "loss": 0.5171, "step": 2656 }, { "epoch": 0.592683470890029, "grad_norm": 0.15452441573143005, "learning_rate": 1.8244670642871464e-05, "loss": 0.4825, "step": 2657 }, { "epoch": 0.5929065358019183, "grad_norm": 0.16785788536071777, "learning_rate": 1.8243338585579974e-05, "loss": 0.4993, "step": 2658 }, { "epoch": 0.5931296007138077, "grad_norm": 0.16040126979351044, "learning_rate": 1.824200607172054e-05, "loss": 0.498, "step": 2659 }, { "epoch": 0.5933526656256971, "grad_norm": 0.16780081391334534, "learning_rate": 1.8240673101366963e-05, "loss": 0.509, "step": 2660 }, { "epoch": 0.5935757305375864, "grad_norm": 0.16159792244434357, "learning_rate": 1.823933967459308e-05, "loss": 0.5054, "step": 2661 }, { "epoch": 0.5937987954494758, "grad_norm": 0.1585383266210556, "learning_rate": 1.823800579147273e-05, "loss": 0.4861, "step": 2662 }, { "epoch": 0.5940218603613652, "grad_norm": 0.1630701869726181, "learning_rate": 1.8236671452079805e-05, "loss": 0.5226, "step": 2663 }, { "epoch": 0.5942449252732546, "grad_norm": 0.16565611958503723, "learning_rate": 1.8235336656488203e-05, "loss": 0.4905, "step": 2664 }, { "epoch": 0.5944679901851438, "grad_norm": 0.15455248951911926, "learning_rate": 1.8234001404771856e-05, "loss": 0.4844, "step": 2665 }, { "epoch": 0.5946910550970332, "grad_norm": 0.1679016500711441, "learning_rate": 1.8232665697004713e-05, "loss": 0.5312, "step": 2666 }, { "epoch": 0.5949141200089226, "grad_norm": 0.16260258853435516, "learning_rate": 1.823132953326076e-05, "loss": 0.5106, "step": 2667 }, { "epoch": 0.5951371849208119, "grad_norm": 0.15242820978164673, "learning_rate": 1.8229992913614004e-05, "loss": 0.4641, "step": 2668 }, { "epoch": 0.5953602498327013, "grad_norm": 0.17028586566448212, "learning_rate": 1.822865583813847e-05, "loss": 0.5183, "step": 2669 }, { "epoch": 0.5955833147445907, "grad_norm": 0.14964601397514343, "learning_rate": 1.8227318306908216e-05, "loss": 0.4843, "step": 2670 }, { "epoch": 0.5958063796564801, "grad_norm": 0.1675204187631607, "learning_rate": 1.822598031999732e-05, "loss": 0.5098, "step": 2671 }, { "epoch": 0.5960294445683694, "grad_norm": 0.15445132553577423, "learning_rate": 1.822464187747989e-05, "loss": 0.4759, "step": 2672 }, { "epoch": 0.5962525094802588, "grad_norm": 0.1613750010728836, "learning_rate": 1.822330297943006e-05, "loss": 0.5154, "step": 2673 }, { "epoch": 0.5964755743921482, "grad_norm": 0.16264407336711884, "learning_rate": 1.8221963625921984e-05, "loss": 0.4758, "step": 2674 }, { "epoch": 0.5966986393040374, "grad_norm": 0.16100256145000458, "learning_rate": 1.8220623817029843e-05, "loss": 0.4946, "step": 2675 }, { "epoch": 0.5969217042159268, "grad_norm": 0.1642381250858307, "learning_rate": 1.8219283552827847e-05, "loss": 0.5029, "step": 2676 }, { "epoch": 0.5971447691278162, "grad_norm": 0.16491542756557465, "learning_rate": 1.8217942833390227e-05, "loss": 0.5077, "step": 2677 }, { "epoch": 0.5973678340397055, "grad_norm": 0.1611475944519043, "learning_rate": 1.821660165879124e-05, "loss": 0.5346, "step": 2678 }, { "epoch": 0.5975908989515949, "grad_norm": 0.16811859607696533, "learning_rate": 1.8215260029105166e-05, "loss": 0.5173, "step": 2679 }, { "epoch": 0.5978139638634843, "grad_norm": 0.17261561751365662, "learning_rate": 1.8213917944406315e-05, "loss": 0.5142, "step": 2680 }, { "epoch": 0.5980370287753737, "grad_norm": 0.1671370267868042, "learning_rate": 1.8212575404769023e-05, "loss": 0.5289, "step": 2681 }, { "epoch": 0.598260093687263, "grad_norm": 0.16574646532535553, "learning_rate": 1.8211232410267645e-05, "loss": 0.5179, "step": 2682 }, { "epoch": 0.5984831585991524, "grad_norm": 0.1583874672651291, "learning_rate": 1.8209888960976565e-05, "loss": 0.503, "step": 2683 }, { "epoch": 0.5987062235110417, "grad_norm": 0.15771937370300293, "learning_rate": 1.8208545056970193e-05, "loss": 0.4811, "step": 2684 }, { "epoch": 0.598929288422931, "grad_norm": 0.16496342420578003, "learning_rate": 1.820720069832296e-05, "loss": 0.5178, "step": 2685 }, { "epoch": 0.5991523533348204, "grad_norm": 0.1662297397851944, "learning_rate": 1.820585588510933e-05, "loss": 0.5286, "step": 2686 }, { "epoch": 0.5993754182467098, "grad_norm": 0.15326498448848724, "learning_rate": 1.8204510617403785e-05, "loss": 0.4983, "step": 2687 }, { "epoch": 0.5995984831585992, "grad_norm": 0.1545604169368744, "learning_rate": 1.820316489528083e-05, "loss": 0.4746, "step": 2688 }, { "epoch": 0.5998215480704885, "grad_norm": 0.16388210654258728, "learning_rate": 1.8201818718815004e-05, "loss": 0.4766, "step": 2689 }, { "epoch": 0.6000446129823779, "grad_norm": 0.16617290675640106, "learning_rate": 1.820047208808087e-05, "loss": 0.5301, "step": 2690 }, { "epoch": 0.6002676778942673, "grad_norm": 0.16141341626644135, "learning_rate": 1.8199125003153e-05, "loss": 0.5095, "step": 2691 }, { "epoch": 0.6004907428061566, "grad_norm": 0.1575475037097931, "learning_rate": 1.8197777464106022e-05, "loss": 0.5164, "step": 2692 }, { "epoch": 0.600713807718046, "grad_norm": 0.1886652559041977, "learning_rate": 1.8196429471014558e-05, "loss": 0.4922, "step": 2693 }, { "epoch": 0.6009368726299353, "grad_norm": 0.16036200523376465, "learning_rate": 1.8195081023953268e-05, "loss": 0.5116, "step": 2694 }, { "epoch": 0.6011599375418246, "grad_norm": 0.16269893944263458, "learning_rate": 1.8193732122996847e-05, "loss": 0.5135, "step": 2695 }, { "epoch": 0.601383002453714, "grad_norm": 0.1868923157453537, "learning_rate": 1.819238276822e-05, "loss": 0.5052, "step": 2696 }, { "epoch": 0.6016060673656034, "grad_norm": 0.16269947588443756, "learning_rate": 1.8191032959697464e-05, "loss": 0.4829, "step": 2697 }, { "epoch": 0.6018291322774928, "grad_norm": 0.1679493635892868, "learning_rate": 1.8189682697504e-05, "loss": 0.4967, "step": 2698 }, { "epoch": 0.6020521971893821, "grad_norm": 0.16471365094184875, "learning_rate": 1.8188331981714386e-05, "loss": 0.5189, "step": 2699 }, { "epoch": 0.6022752621012715, "grad_norm": 0.16562478244304657, "learning_rate": 1.8186980812403448e-05, "loss": 0.5217, "step": 2700 }, { "epoch": 0.6024983270131609, "grad_norm": 0.16145823895931244, "learning_rate": 1.818562918964601e-05, "loss": 0.5008, "step": 2701 }, { "epoch": 0.6027213919250501, "grad_norm": 0.17072793841362, "learning_rate": 1.8184277113516938e-05, "loss": 0.5302, "step": 2702 }, { "epoch": 0.6029444568369395, "grad_norm": 0.16824592649936676, "learning_rate": 1.8182924584091122e-05, "loss": 0.5358, "step": 2703 }, { "epoch": 0.6031675217488289, "grad_norm": 0.1661488264799118, "learning_rate": 1.8181571601443465e-05, "loss": 0.5391, "step": 2704 }, { "epoch": 0.6033905866607183, "grad_norm": 0.15754079818725586, "learning_rate": 1.8180218165648913e-05, "loss": 0.5013, "step": 2705 }, { "epoch": 0.6036136515726076, "grad_norm": 0.1813340038061142, "learning_rate": 1.817886427678242e-05, "loss": 0.5301, "step": 2706 }, { "epoch": 0.603836716484497, "grad_norm": 0.16588057577610016, "learning_rate": 1.817750993491898e-05, "loss": 0.4924, "step": 2707 }, { "epoch": 0.6040597813963864, "grad_norm": 0.17572402954101562, "learning_rate": 1.8176155140133596e-05, "loss": 0.5075, "step": 2708 }, { "epoch": 0.6042828463082757, "grad_norm": 0.157828688621521, "learning_rate": 1.8174799892501315e-05, "loss": 0.5133, "step": 2709 }, { "epoch": 0.6045059112201651, "grad_norm": 0.1617002636194229, "learning_rate": 1.817344419209719e-05, "loss": 0.5092, "step": 2710 }, { "epoch": 0.6047289761320545, "grad_norm": 0.1876181662082672, "learning_rate": 1.817208803899632e-05, "loss": 0.5326, "step": 2711 }, { "epoch": 0.6049520410439437, "grad_norm": 0.15636108815670013, "learning_rate": 1.8170731433273802e-05, "loss": 0.4922, "step": 2712 }, { "epoch": 0.6051751059558331, "grad_norm": 0.15785710513591766, "learning_rate": 1.8169374375004784e-05, "loss": 0.4956, "step": 2713 }, { "epoch": 0.6053981708677225, "grad_norm": 0.17369434237480164, "learning_rate": 1.8168016864264426e-05, "loss": 0.493, "step": 2714 }, { "epoch": 0.6056212357796119, "grad_norm": 0.16383057832717896, "learning_rate": 1.8166658901127915e-05, "loss": 0.4638, "step": 2715 }, { "epoch": 0.6058443006915012, "grad_norm": 0.17268022894859314, "learning_rate": 1.8165300485670464e-05, "loss": 0.5056, "step": 2716 }, { "epoch": 0.6060673656033906, "grad_norm": 0.1691250205039978, "learning_rate": 1.8163941617967313e-05, "loss": 0.5161, "step": 2717 }, { "epoch": 0.60629043051528, "grad_norm": 0.1628870815038681, "learning_rate": 1.8162582298093715e-05, "loss": 0.5015, "step": 2718 }, { "epoch": 0.6065134954271693, "grad_norm": 0.17182950675487518, "learning_rate": 1.816122252612497e-05, "loss": 0.5237, "step": 2719 }, { "epoch": 0.6067365603390587, "grad_norm": 0.17092610895633698, "learning_rate": 1.8159862302136386e-05, "loss": 0.4941, "step": 2720 }, { "epoch": 0.606959625250948, "grad_norm": 0.16498810052871704, "learning_rate": 1.8158501626203298e-05, "loss": 0.5197, "step": 2721 }, { "epoch": 0.6071826901628374, "grad_norm": 0.16107720136642456, "learning_rate": 1.815714049840107e-05, "loss": 0.5186, "step": 2722 }, { "epoch": 0.6074057550747267, "grad_norm": 0.1599961370229721, "learning_rate": 1.8155778918805095e-05, "loss": 0.5035, "step": 2723 }, { "epoch": 0.6076288199866161, "grad_norm": 0.16911983489990234, "learning_rate": 1.815441688749078e-05, "loss": 0.5009, "step": 2724 }, { "epoch": 0.6078518848985055, "grad_norm": 0.16482600569725037, "learning_rate": 1.8153054404533562e-05, "loss": 0.4991, "step": 2725 }, { "epoch": 0.6080749498103948, "grad_norm": 0.16007374227046967, "learning_rate": 1.8151691470008906e-05, "loss": 0.4837, "step": 2726 }, { "epoch": 0.6082980147222842, "grad_norm": 0.16472531855106354, "learning_rate": 1.81503280839923e-05, "loss": 0.4997, "step": 2727 }, { "epoch": 0.6085210796341736, "grad_norm": 0.1605815589427948, "learning_rate": 1.814896424655926e-05, "loss": 0.5011, "step": 2728 }, { "epoch": 0.608744144546063, "grad_norm": 0.16523852944374084, "learning_rate": 1.814759995778532e-05, "loss": 0.4947, "step": 2729 }, { "epoch": 0.6089672094579522, "grad_norm": 0.1722632497549057, "learning_rate": 1.8146235217746043e-05, "loss": 0.495, "step": 2730 }, { "epoch": 0.6091902743698416, "grad_norm": 0.16873040795326233, "learning_rate": 1.8144870026517018e-05, "loss": 0.5273, "step": 2731 }, { "epoch": 0.609413339281731, "grad_norm": 0.16078028082847595, "learning_rate": 1.8143504384173858e-05, "loss": 0.5051, "step": 2732 }, { "epoch": 0.6096364041936203, "grad_norm": 0.18267033994197845, "learning_rate": 1.8142138290792202e-05, "loss": 0.4946, "step": 2733 }, { "epoch": 0.6098594691055097, "grad_norm": 0.17000767588615417, "learning_rate": 1.814077174644771e-05, "loss": 0.5079, "step": 2734 }, { "epoch": 0.6100825340173991, "grad_norm": 0.1608540564775467, "learning_rate": 1.813940475121607e-05, "loss": 0.4912, "step": 2735 }, { "epoch": 0.6103055989292884, "grad_norm": 0.160196915268898, "learning_rate": 1.8138037305172997e-05, "loss": 0.5315, "step": 2736 }, { "epoch": 0.6105286638411778, "grad_norm": 0.16966816782951355, "learning_rate": 1.813666940839423e-05, "loss": 0.532, "step": 2737 }, { "epoch": 0.6107517287530672, "grad_norm": 0.19911563396453857, "learning_rate": 1.8135301060955525e-05, "loss": 0.4847, "step": 2738 }, { "epoch": 0.6109747936649566, "grad_norm": 0.16148284077644348, "learning_rate": 1.8133932262932678e-05, "loss": 0.4756, "step": 2739 }, { "epoch": 0.6111978585768458, "grad_norm": 0.16302482783794403, "learning_rate": 1.8132563014401497e-05, "loss": 0.5021, "step": 2740 }, { "epoch": 0.6114209234887352, "grad_norm": 0.1716173142194748, "learning_rate": 1.813119331543782e-05, "loss": 0.5303, "step": 2741 }, { "epoch": 0.6116439884006246, "grad_norm": 0.16563881933689117, "learning_rate": 1.812982316611751e-05, "loss": 0.4991, "step": 2742 }, { "epoch": 0.6118670533125139, "grad_norm": 0.1720925122499466, "learning_rate": 1.812845256651645e-05, "loss": 0.5026, "step": 2743 }, { "epoch": 0.6120901182244033, "grad_norm": 0.16051127016544342, "learning_rate": 1.8127081516710565e-05, "loss": 0.4837, "step": 2744 }, { "epoch": 0.6123131831362927, "grad_norm": 0.16201861202716827, "learning_rate": 1.8125710016775778e-05, "loss": 0.5161, "step": 2745 }, { "epoch": 0.6125362480481821, "grad_norm": 0.17014898359775543, "learning_rate": 1.812433806678806e-05, "loss": 0.5346, "step": 2746 }, { "epoch": 0.6127593129600714, "grad_norm": 0.1703520119190216, "learning_rate": 1.8122965666823398e-05, "loss": 0.5182, "step": 2747 }, { "epoch": 0.6129823778719607, "grad_norm": 0.19158673286437988, "learning_rate": 1.8121592816957797e-05, "loss": 0.5043, "step": 2748 }, { "epoch": 0.6132054427838501, "grad_norm": 0.14746567606925964, "learning_rate": 1.8120219517267302e-05, "loss": 0.4841, "step": 2749 }, { "epoch": 0.6134285076957394, "grad_norm": 0.16160626709461212, "learning_rate": 1.811884576782797e-05, "loss": 0.5158, "step": 2750 }, { "epoch": 0.6136515726076288, "grad_norm": 0.17975735664367676, "learning_rate": 1.8117471568715893e-05, "loss": 0.5235, "step": 2751 }, { "epoch": 0.6138746375195182, "grad_norm": 0.17119070887565613, "learning_rate": 1.8116096920007177e-05, "loss": 0.4951, "step": 2752 }, { "epoch": 0.6140977024314075, "grad_norm": 0.16311194002628326, "learning_rate": 1.8114721821777964e-05, "loss": 0.499, "step": 2753 }, { "epoch": 0.6143207673432969, "grad_norm": 0.16315460205078125, "learning_rate": 1.811334627410441e-05, "loss": 0.51, "step": 2754 }, { "epoch": 0.6145438322551863, "grad_norm": 0.44980496168136597, "learning_rate": 1.81119702770627e-05, "loss": 0.5005, "step": 2755 }, { "epoch": 0.6147668971670757, "grad_norm": 0.17961294949054718, "learning_rate": 1.8110593830729057e-05, "loss": 0.5204, "step": 2756 }, { "epoch": 0.614989962078965, "grad_norm": 0.16227351129055023, "learning_rate": 1.8109216935179712e-05, "loss": 0.5081, "step": 2757 }, { "epoch": 0.6152130269908543, "grad_norm": 0.1761593520641327, "learning_rate": 1.810783959049092e-05, "loss": 0.5163, "step": 2758 }, { "epoch": 0.6154360919027437, "grad_norm": 0.17095595598220825, "learning_rate": 1.810646179673897e-05, "loss": 0.4948, "step": 2759 }, { "epoch": 0.615659156814633, "grad_norm": 0.16665005683898926, "learning_rate": 1.8105083554000175e-05, "loss": 0.4937, "step": 2760 }, { "epoch": 0.6158822217265224, "grad_norm": 0.16761796176433563, "learning_rate": 1.810370486235087e-05, "loss": 0.5081, "step": 2761 }, { "epoch": 0.6161052866384118, "grad_norm": 0.17292195558547974, "learning_rate": 1.8102325721867417e-05, "loss": 0.5027, "step": 2762 }, { "epoch": 0.6163283515503012, "grad_norm": 0.1606137752532959, "learning_rate": 1.8100946132626197e-05, "loss": 0.4867, "step": 2763 }, { "epoch": 0.6165514164621905, "grad_norm": 0.1619354635477066, "learning_rate": 1.8099566094703626e-05, "loss": 0.474, "step": 2764 }, { "epoch": 0.6167744813740799, "grad_norm": 0.16491201519966125, "learning_rate": 1.8098185608176132e-05, "loss": 0.4953, "step": 2765 }, { "epoch": 0.6169975462859693, "grad_norm": 0.16283756494522095, "learning_rate": 1.8096804673120183e-05, "loss": 0.4934, "step": 2766 }, { "epoch": 0.6172206111978585, "grad_norm": 0.15283644199371338, "learning_rate": 1.809542328961226e-05, "loss": 0.4691, "step": 2767 }, { "epoch": 0.6174436761097479, "grad_norm": 0.16907426714897156, "learning_rate": 1.809404145772887e-05, "loss": 0.4901, "step": 2768 }, { "epoch": 0.6176667410216373, "grad_norm": 0.16500405967235565, "learning_rate": 1.8092659177546554e-05, "loss": 0.5042, "step": 2769 }, { "epoch": 0.6178898059335266, "grad_norm": 0.16616129875183105, "learning_rate": 1.8091276449141868e-05, "loss": 0.528, "step": 2770 }, { "epoch": 0.618112870845416, "grad_norm": 0.15396897494792938, "learning_rate": 1.8089893272591393e-05, "loss": 0.4797, "step": 2771 }, { "epoch": 0.6183359357573054, "grad_norm": 0.1576898992061615, "learning_rate": 1.8088509647971744e-05, "loss": 0.5007, "step": 2772 }, { "epoch": 0.6185590006691948, "grad_norm": 0.1703861653804779, "learning_rate": 1.808712557535955e-05, "loss": 0.5114, "step": 2773 }, { "epoch": 0.6187820655810841, "grad_norm": 0.15141689777374268, "learning_rate": 1.8085741054831472e-05, "loss": 0.457, "step": 2774 }, { "epoch": 0.6190051304929735, "grad_norm": 0.19050109386444092, "learning_rate": 1.8084356086464197e-05, "loss": 0.474, "step": 2775 }, { "epoch": 0.6192281954048628, "grad_norm": 0.17825506627559662, "learning_rate": 1.8082970670334425e-05, "loss": 0.5164, "step": 2776 }, { "epoch": 0.6194512603167521, "grad_norm": 0.16141526401042938, "learning_rate": 1.8081584806518897e-05, "loss": 0.5025, "step": 2777 }, { "epoch": 0.6196743252286415, "grad_norm": 0.23704519867897034, "learning_rate": 1.8080198495094364e-05, "loss": 0.5315, "step": 2778 }, { "epoch": 0.6198973901405309, "grad_norm": 0.15994718670845032, "learning_rate": 1.8078811736137612e-05, "loss": 0.4853, "step": 2779 }, { "epoch": 0.6201204550524203, "grad_norm": 0.1649598479270935, "learning_rate": 1.807742452972545e-05, "loss": 0.5262, "step": 2780 }, { "epoch": 0.6203435199643096, "grad_norm": 0.14894191920757294, "learning_rate": 1.8076036875934707e-05, "loss": 0.4816, "step": 2781 }, { "epoch": 0.620566584876199, "grad_norm": 0.17157211899757385, "learning_rate": 1.807464877484224e-05, "loss": 0.4847, "step": 2782 }, { "epoch": 0.6207896497880884, "grad_norm": 0.1663542538881302, "learning_rate": 1.8073260226524937e-05, "loss": 0.4931, "step": 2783 }, { "epoch": 0.6210127146999777, "grad_norm": 0.1610024869441986, "learning_rate": 1.8071871231059695e-05, "loss": 0.4924, "step": 2784 }, { "epoch": 0.621235779611867, "grad_norm": 0.16384142637252808, "learning_rate": 1.807048178852345e-05, "loss": 0.5143, "step": 2785 }, { "epoch": 0.6214588445237564, "grad_norm": 0.16362221539020538, "learning_rate": 1.8069091898993162e-05, "loss": 0.5172, "step": 2786 }, { "epoch": 0.6216819094356457, "grad_norm": 0.19173026084899902, "learning_rate": 1.8067701562545808e-05, "loss": 0.512, "step": 2787 }, { "epoch": 0.6219049743475351, "grad_norm": 0.15911833941936493, "learning_rate": 1.8066310779258393e-05, "loss": 0.4874, "step": 2788 }, { "epoch": 0.6221280392594245, "grad_norm": 0.15391753613948822, "learning_rate": 1.8064919549207946e-05, "loss": 0.4966, "step": 2789 }, { "epoch": 0.6223511041713139, "grad_norm": 0.21308940649032593, "learning_rate": 1.8063527872471523e-05, "loss": 0.5172, "step": 2790 }, { "epoch": 0.6225741690832032, "grad_norm": 0.15762938559055328, "learning_rate": 1.8062135749126208e-05, "loss": 0.4904, "step": 2791 }, { "epoch": 0.6227972339950926, "grad_norm": 0.18538956344127655, "learning_rate": 1.80607431792491e-05, "loss": 0.4907, "step": 2792 }, { "epoch": 0.623020298906982, "grad_norm": 0.1694374829530716, "learning_rate": 1.8059350162917333e-05, "loss": 0.4892, "step": 2793 }, { "epoch": 0.6232433638188712, "grad_norm": 0.1632799506187439, "learning_rate": 1.8057956700208055e-05, "loss": 0.531, "step": 2794 }, { "epoch": 0.6234664287307606, "grad_norm": 0.19030988216400146, "learning_rate": 1.805656279119845e-05, "loss": 0.4801, "step": 2795 }, { "epoch": 0.62368949364265, "grad_norm": 0.17578838765621185, "learning_rate": 1.8055168435965722e-05, "loss": 0.5129, "step": 2796 }, { "epoch": 0.6239125585545394, "grad_norm": 0.17337745428085327, "learning_rate": 1.8053773634587095e-05, "loss": 0.5252, "step": 2797 }, { "epoch": 0.6241356234664287, "grad_norm": 0.16400404274463654, "learning_rate": 1.8052378387139827e-05, "loss": 0.4977, "step": 2798 }, { "epoch": 0.6243586883783181, "grad_norm": 0.16884362697601318, "learning_rate": 1.8050982693701188e-05, "loss": 0.5209, "step": 2799 }, { "epoch": 0.6245817532902075, "grad_norm": 0.16655333340168, "learning_rate": 1.8049586554348487e-05, "loss": 0.5236, "step": 2800 }, { "epoch": 0.6248048182020968, "grad_norm": 0.1815658062696457, "learning_rate": 1.804818996915905e-05, "loss": 0.5155, "step": 2801 }, { "epoch": 0.6250278831139862, "grad_norm": 0.1637655794620514, "learning_rate": 1.8046792938210226e-05, "loss": 0.5108, "step": 2802 }, { "epoch": 0.6252509480258756, "grad_norm": 0.16214872896671295, "learning_rate": 1.804539546157939e-05, "loss": 0.5015, "step": 2803 }, { "epoch": 0.625474012937765, "grad_norm": 0.1624768078327179, "learning_rate": 1.804399753934395e-05, "loss": 0.4852, "step": 2804 }, { "epoch": 0.6256970778496542, "grad_norm": 0.1509588062763214, "learning_rate": 1.8042599171581322e-05, "loss": 0.4895, "step": 2805 }, { "epoch": 0.6259201427615436, "grad_norm": 0.15983150899410248, "learning_rate": 1.804120035836897e-05, "loss": 0.5066, "step": 2806 }, { "epoch": 0.626143207673433, "grad_norm": 0.1639653593301773, "learning_rate": 1.8039801099784356e-05, "loss": 0.5081, "step": 2807 }, { "epoch": 0.6263662725853223, "grad_norm": 0.16054677963256836, "learning_rate": 1.8038401395904984e-05, "loss": 0.5019, "step": 2808 }, { "epoch": 0.6265893374972117, "grad_norm": 0.16557928919792175, "learning_rate": 1.8037001246808382e-05, "loss": 0.5085, "step": 2809 }, { "epoch": 0.6268124024091011, "grad_norm": 0.17172253131866455, "learning_rate": 1.8035600652572093e-05, "loss": 0.5069, "step": 2810 }, { "epoch": 0.6270354673209904, "grad_norm": 0.16289275884628296, "learning_rate": 1.80341996132737e-05, "loss": 0.5054, "step": 2811 }, { "epoch": 0.6272585322328798, "grad_norm": 0.159224271774292, "learning_rate": 1.8032798128990788e-05, "loss": 0.4884, "step": 2812 }, { "epoch": 0.6274815971447691, "grad_norm": 0.1516553908586502, "learning_rate": 1.803139619980099e-05, "loss": 0.4823, "step": 2813 }, { "epoch": 0.6277046620566585, "grad_norm": 0.164667546749115, "learning_rate": 1.802999382578195e-05, "loss": 0.5184, "step": 2814 }, { "epoch": 0.6279277269685478, "grad_norm": 0.17175255715847015, "learning_rate": 1.8028591007011343e-05, "loss": 0.5166, "step": 2815 }, { "epoch": 0.6281507918804372, "grad_norm": 0.18747776746749878, "learning_rate": 1.8027187743566867e-05, "loss": 0.499, "step": 2816 }, { "epoch": 0.6283738567923266, "grad_norm": 0.16748382151126862, "learning_rate": 1.8025784035526235e-05, "loss": 0.5099, "step": 2817 }, { "epoch": 0.6285969217042159, "grad_norm": 0.19813144207000732, "learning_rate": 1.80243798829672e-05, "loss": 0.5045, "step": 2818 }, { "epoch": 0.6288199866161053, "grad_norm": 0.16501356661319733, "learning_rate": 1.8022975285967534e-05, "loss": 0.514, "step": 2819 }, { "epoch": 0.6290430515279947, "grad_norm": 0.17181000113487244, "learning_rate": 1.8021570244605028e-05, "loss": 0.4963, "step": 2820 }, { "epoch": 0.6292661164398841, "grad_norm": 0.16481012105941772, "learning_rate": 1.8020164758957505e-05, "loss": 0.4831, "step": 2821 }, { "epoch": 0.6294891813517733, "grad_norm": 0.1659911572933197, "learning_rate": 1.8018758829102808e-05, "loss": 0.5026, "step": 2822 }, { "epoch": 0.6297122462636627, "grad_norm": 0.18394367396831512, "learning_rate": 1.8017352455118812e-05, "loss": 0.5072, "step": 2823 }, { "epoch": 0.6299353111755521, "grad_norm": 0.1666680872440338, "learning_rate": 1.80159456370834e-05, "loss": 0.4986, "step": 2824 }, { "epoch": 0.6301583760874414, "grad_norm": 0.184769406914711, "learning_rate": 1.80145383750745e-05, "loss": 0.4863, "step": 2825 }, { "epoch": 0.6303814409993308, "grad_norm": 0.15709228813648224, "learning_rate": 1.801313066917005e-05, "loss": 0.4945, "step": 2826 }, { "epoch": 0.6306045059112202, "grad_norm": 0.16040046513080597, "learning_rate": 1.801172251944802e-05, "loss": 0.4704, "step": 2827 }, { "epoch": 0.6308275708231095, "grad_norm": 0.174714133143425, "learning_rate": 1.8010313925986398e-05, "loss": 0.5224, "step": 2828 }, { "epoch": 0.6310506357349989, "grad_norm": 0.17879654467105865, "learning_rate": 1.8008904888863206e-05, "loss": 0.5168, "step": 2829 }, { "epoch": 0.6312737006468883, "grad_norm": 0.16159847378730774, "learning_rate": 1.8007495408156483e-05, "loss": 0.4905, "step": 2830 }, { "epoch": 0.6314967655587777, "grad_norm": 0.16279365122318268, "learning_rate": 1.8006085483944295e-05, "loss": 0.5004, "step": 2831 }, { "epoch": 0.6317198304706669, "grad_norm": 0.16124996542930603, "learning_rate": 1.800467511630473e-05, "loss": 0.498, "step": 2832 }, { "epoch": 0.6319428953825563, "grad_norm": 0.16358453035354614, "learning_rate": 1.800326430531591e-05, "loss": 0.5172, "step": 2833 }, { "epoch": 0.6321659602944457, "grad_norm": 0.16300733387470245, "learning_rate": 1.8001853051055967e-05, "loss": 0.5024, "step": 2834 }, { "epoch": 0.632389025206335, "grad_norm": 0.1758897304534912, "learning_rate": 1.8000441353603072e-05, "loss": 0.4946, "step": 2835 }, { "epoch": 0.6326120901182244, "grad_norm": 0.1550527960062027, "learning_rate": 1.7999029213035408e-05, "loss": 0.5027, "step": 2836 }, { "epoch": 0.6328351550301138, "grad_norm": 0.15276572108268738, "learning_rate": 1.799761662943119e-05, "loss": 0.4968, "step": 2837 }, { "epoch": 0.6330582199420032, "grad_norm": 0.16600914299488068, "learning_rate": 1.7996203602868657e-05, "loss": 0.5278, "step": 2838 }, { "epoch": 0.6332812848538925, "grad_norm": 0.16788353025913239, "learning_rate": 1.799479013342607e-05, "loss": 0.5071, "step": 2839 }, { "epoch": 0.6335043497657818, "grad_norm": 0.16952337324619293, "learning_rate": 1.7993376221181716e-05, "loss": 0.4852, "step": 2840 }, { "epoch": 0.6337274146776712, "grad_norm": 0.28554677963256836, "learning_rate": 1.7991961866213907e-05, "loss": 0.474, "step": 2841 }, { "epoch": 0.6339504795895605, "grad_norm": 0.17595677077770233, "learning_rate": 1.7990547068600977e-05, "loss": 0.5045, "step": 2842 }, { "epoch": 0.6341735445014499, "grad_norm": 0.1631106734275818, "learning_rate": 1.798913182842129e-05, "loss": 0.4943, "step": 2843 }, { "epoch": 0.6343966094133393, "grad_norm": 0.1630406528711319, "learning_rate": 1.7987716145753226e-05, "loss": 0.5095, "step": 2844 }, { "epoch": 0.6346196743252286, "grad_norm": 0.17511357367038727, "learning_rate": 1.7986300020675198e-05, "loss": 0.5203, "step": 2845 }, { "epoch": 0.634842739237118, "grad_norm": 0.1971133053302765, "learning_rate": 1.798488345326564e-05, "loss": 0.5061, "step": 2846 }, { "epoch": 0.6350658041490074, "grad_norm": 0.15673868358135223, "learning_rate": 1.7983466443603008e-05, "loss": 0.4957, "step": 2847 }, { "epoch": 0.6352888690608968, "grad_norm": 0.1608821600675583, "learning_rate": 1.798204899176579e-05, "loss": 0.4848, "step": 2848 }, { "epoch": 0.635511933972786, "grad_norm": 0.15686407685279846, "learning_rate": 1.7980631097832485e-05, "loss": 0.4986, "step": 2849 }, { "epoch": 0.6357349988846754, "grad_norm": 0.16254571080207825, "learning_rate": 1.797921276188163e-05, "loss": 0.5255, "step": 2850 }, { "epoch": 0.6359580637965648, "grad_norm": 0.17136751115322113, "learning_rate": 1.7977793983991785e-05, "loss": 0.5029, "step": 2851 }, { "epoch": 0.6361811287084541, "grad_norm": 0.2001720815896988, "learning_rate": 1.7976374764241523e-05, "loss": 0.5208, "step": 2852 }, { "epoch": 0.6364041936203435, "grad_norm": 0.15888839960098267, "learning_rate": 1.7974955102709457e-05, "loss": 0.5125, "step": 2853 }, { "epoch": 0.6366272585322329, "grad_norm": 0.1562851518392563, "learning_rate": 1.797353499947421e-05, "loss": 0.4647, "step": 2854 }, { "epoch": 0.6368503234441223, "grad_norm": 0.157870814204216, "learning_rate": 1.7972114454614436e-05, "loss": 0.5072, "step": 2855 }, { "epoch": 0.6370733883560116, "grad_norm": 0.16168496012687683, "learning_rate": 1.7970693468208823e-05, "loss": 0.4946, "step": 2856 }, { "epoch": 0.637296453267901, "grad_norm": 0.23463092744350433, "learning_rate": 1.796927204033607e-05, "loss": 0.5073, "step": 2857 }, { "epoch": 0.6375195181797904, "grad_norm": 0.18455378711223602, "learning_rate": 1.7967850171074896e-05, "loss": 0.5278, "step": 2858 }, { "epoch": 0.6377425830916796, "grad_norm": 0.16380822658538818, "learning_rate": 1.796642786050406e-05, "loss": 0.5048, "step": 2859 }, { "epoch": 0.637965648003569, "grad_norm": 0.15988902747631073, "learning_rate": 1.7965005108702342e-05, "loss": 0.4787, "step": 2860 }, { "epoch": 0.6381887129154584, "grad_norm": 0.16252587735652924, "learning_rate": 1.796358191574854e-05, "loss": 0.5075, "step": 2861 }, { "epoch": 0.6384117778273477, "grad_norm": 0.1922154724597931, "learning_rate": 1.7962158281721475e-05, "loss": 0.4616, "step": 2862 }, { "epoch": 0.6386348427392371, "grad_norm": 0.17007192969322205, "learning_rate": 1.7960734206700002e-05, "loss": 0.4741, "step": 2863 }, { "epoch": 0.6388579076511265, "grad_norm": 0.16017115116119385, "learning_rate": 1.7959309690762992e-05, "loss": 0.4757, "step": 2864 }, { "epoch": 0.6390809725630159, "grad_norm": 0.16466043889522552, "learning_rate": 1.795788473398935e-05, "loss": 0.4856, "step": 2865 }, { "epoch": 0.6393040374749052, "grad_norm": 0.16355857253074646, "learning_rate": 1.795645933645799e-05, "loss": 0.5055, "step": 2866 }, { "epoch": 0.6395271023867946, "grad_norm": 0.1593533307313919, "learning_rate": 1.7955033498247863e-05, "loss": 0.5103, "step": 2867 }, { "epoch": 0.639750167298684, "grad_norm": 0.16536985337734222, "learning_rate": 1.7953607219437942e-05, "loss": 0.5029, "step": 2868 }, { "epoch": 0.6399732322105732, "grad_norm": 0.1590854823589325, "learning_rate": 1.7952180500107225e-05, "loss": 0.4687, "step": 2869 }, { "epoch": 0.6401962971224626, "grad_norm": 0.16659674048423767, "learning_rate": 1.7950753340334734e-05, "loss": 0.4999, "step": 2870 }, { "epoch": 0.640419362034352, "grad_norm": 0.17274489998817444, "learning_rate": 1.7949325740199507e-05, "loss": 0.4995, "step": 2871 }, { "epoch": 0.6406424269462414, "grad_norm": 0.16115543246269226, "learning_rate": 1.7947897699780616e-05, "loss": 0.484, "step": 2872 }, { "epoch": 0.6408654918581307, "grad_norm": 0.16735753417015076, "learning_rate": 1.7946469219157158e-05, "loss": 0.4917, "step": 2873 }, { "epoch": 0.6410885567700201, "grad_norm": 0.1667783111333847, "learning_rate": 1.7945040298408248e-05, "loss": 0.5106, "step": 2874 }, { "epoch": 0.6413116216819095, "grad_norm": 0.16274797916412354, "learning_rate": 1.794361093761303e-05, "loss": 0.5006, "step": 2875 }, { "epoch": 0.6415346865937988, "grad_norm": 0.15851566195487976, "learning_rate": 1.7942181136850672e-05, "loss": 0.5007, "step": 2876 }, { "epoch": 0.6417577515056881, "grad_norm": 0.16172711551189423, "learning_rate": 1.7940750896200363e-05, "loss": 0.5043, "step": 2877 }, { "epoch": 0.6419808164175775, "grad_norm": 0.18239612877368927, "learning_rate": 1.7939320215741322e-05, "loss": 0.4888, "step": 2878 }, { "epoch": 0.6422038813294669, "grad_norm": 0.17973902821540833, "learning_rate": 1.7937889095552787e-05, "loss": 0.4813, "step": 2879 }, { "epoch": 0.6424269462413562, "grad_norm": 0.1610574722290039, "learning_rate": 1.7936457535714023e-05, "loss": 0.5138, "step": 2880 }, { "epoch": 0.6426500111532456, "grad_norm": 0.16427603363990784, "learning_rate": 1.7935025536304317e-05, "loss": 0.483, "step": 2881 }, { "epoch": 0.642873076065135, "grad_norm": 0.18190504610538483, "learning_rate": 1.7933593097402983e-05, "loss": 0.4968, "step": 2882 }, { "epoch": 0.6430961409770243, "grad_norm": 0.16263769567012787, "learning_rate": 1.793216021908936e-05, "loss": 0.5245, "step": 2883 }, { "epoch": 0.6433192058889137, "grad_norm": 0.16960637271404266, "learning_rate": 1.793072690144281e-05, "loss": 0.5353, "step": 2884 }, { "epoch": 0.6435422708008031, "grad_norm": 0.1611735224723816, "learning_rate": 1.7929293144542715e-05, "loss": 0.4701, "step": 2885 }, { "epoch": 0.6437653357126923, "grad_norm": 0.16312386095523834, "learning_rate": 1.792785894846849e-05, "loss": 0.5167, "step": 2886 }, { "epoch": 0.6439884006245817, "grad_norm": 0.17161010205745697, "learning_rate": 1.7926424313299568e-05, "loss": 0.5092, "step": 2887 }, { "epoch": 0.6442114655364711, "grad_norm": 0.1609477698802948, "learning_rate": 1.7924989239115407e-05, "loss": 0.5295, "step": 2888 }, { "epoch": 0.6444345304483605, "grad_norm": 0.1607387214899063, "learning_rate": 1.7923553725995494e-05, "loss": 0.5205, "step": 2889 }, { "epoch": 0.6446575953602498, "grad_norm": 0.16367650032043457, "learning_rate": 1.7922117774019333e-05, "loss": 0.4875, "step": 2890 }, { "epoch": 0.6448806602721392, "grad_norm": 0.17014965415000916, "learning_rate": 1.7920681383266458e-05, "loss": 0.5218, "step": 2891 }, { "epoch": 0.6451037251840286, "grad_norm": 0.1545102298259735, "learning_rate": 1.7919244553816426e-05, "loss": 0.4954, "step": 2892 }, { "epoch": 0.6453267900959179, "grad_norm": 0.2512567937374115, "learning_rate": 1.7917807285748817e-05, "loss": 0.5173, "step": 2893 }, { "epoch": 0.6455498550078073, "grad_norm": 0.17359793186187744, "learning_rate": 1.7916369579143235e-05, "loss": 0.5253, "step": 2894 }, { "epoch": 0.6457729199196967, "grad_norm": 0.17602702975273132, "learning_rate": 1.7914931434079305e-05, "loss": 0.492, "step": 2895 }, { "epoch": 0.645995984831586, "grad_norm": 0.1598963886499405, "learning_rate": 1.791349285063669e-05, "loss": 0.5071, "step": 2896 }, { "epoch": 0.6462190497434753, "grad_norm": 0.16161389648914337, "learning_rate": 1.7912053828895064e-05, "loss": 0.4896, "step": 2897 }, { "epoch": 0.6464421146553647, "grad_norm": 0.16500675678253174, "learning_rate": 1.7910614368934127e-05, "loss": 0.5142, "step": 2898 }, { "epoch": 0.6466651795672541, "grad_norm": 0.15490639209747314, "learning_rate": 1.7909174470833604e-05, "loss": 0.4708, "step": 2899 }, { "epoch": 0.6468882444791434, "grad_norm": 0.1680765300989151, "learning_rate": 1.7907734134673252e-05, "loss": 0.4978, "step": 2900 }, { "epoch": 0.6471113093910328, "grad_norm": 0.16419318318367004, "learning_rate": 1.790629336053284e-05, "loss": 0.5106, "step": 2901 }, { "epoch": 0.6473343743029222, "grad_norm": 0.1587122082710266, "learning_rate": 1.790485214849217e-05, "loss": 0.456, "step": 2902 }, { "epoch": 0.6475574392148115, "grad_norm": 0.17150086164474487, "learning_rate": 1.7903410498631063e-05, "loss": 0.5066, "step": 2903 }, { "epoch": 0.6477805041267009, "grad_norm": 0.16548244655132294, "learning_rate": 1.790196841102937e-05, "loss": 0.5297, "step": 2904 }, { "epoch": 0.6480035690385902, "grad_norm": 0.1526126116514206, "learning_rate": 1.790052588576696e-05, "loss": 0.4693, "step": 2905 }, { "epoch": 0.6482266339504796, "grad_norm": 0.16352921724319458, "learning_rate": 1.7899082922923732e-05, "loss": 0.5291, "step": 2906 }, { "epoch": 0.6484496988623689, "grad_norm": 0.16485454142093658, "learning_rate": 1.78976395225796e-05, "loss": 0.4752, "step": 2907 }, { "epoch": 0.6486727637742583, "grad_norm": 0.1610778123140335, "learning_rate": 1.7896195684814516e-05, "loss": 0.479, "step": 2908 }, { "epoch": 0.6488958286861477, "grad_norm": 0.16191574931144714, "learning_rate": 1.7894751409708447e-05, "loss": 0.5059, "step": 2909 }, { "epoch": 0.649118893598037, "grad_norm": 0.1598249077796936, "learning_rate": 1.7893306697341385e-05, "loss": 0.487, "step": 2910 }, { "epoch": 0.6493419585099264, "grad_norm": 0.1673557460308075, "learning_rate": 1.7891861547793345e-05, "loss": 0.4926, "step": 2911 }, { "epoch": 0.6495650234218158, "grad_norm": 0.16955086588859558, "learning_rate": 1.789041596114437e-05, "loss": 0.4912, "step": 2912 }, { "epoch": 0.6497880883337052, "grad_norm": 0.15351925790309906, "learning_rate": 1.788896993747453e-05, "loss": 0.4758, "step": 2913 }, { "epoch": 0.6500111532455944, "grad_norm": 0.16994906961917877, "learning_rate": 1.7887523476863907e-05, "loss": 0.5038, "step": 2914 }, { "epoch": 0.6502342181574838, "grad_norm": 0.16765102744102478, "learning_rate": 1.7886076579392622e-05, "loss": 0.4787, "step": 2915 }, { "epoch": 0.6504572830693732, "grad_norm": 0.16875889897346497, "learning_rate": 1.7884629245140812e-05, "loss": 0.5267, "step": 2916 }, { "epoch": 0.6506803479812625, "grad_norm": 0.16783274710178375, "learning_rate": 1.7883181474188637e-05, "loss": 0.4913, "step": 2917 }, { "epoch": 0.6509034128931519, "grad_norm": 0.16633883118629456, "learning_rate": 1.7881733266616284e-05, "loss": 0.5019, "step": 2918 }, { "epoch": 0.6511264778050413, "grad_norm": 0.1654990315437317, "learning_rate": 1.7880284622503966e-05, "loss": 0.5033, "step": 2919 }, { "epoch": 0.6513495427169306, "grad_norm": 0.16043652594089508, "learning_rate": 1.7878835541931915e-05, "loss": 0.4957, "step": 2920 }, { "epoch": 0.65157260762882, "grad_norm": 0.16924645006656647, "learning_rate": 1.7877386024980392e-05, "loss": 0.5223, "step": 2921 }, { "epoch": 0.6517956725407094, "grad_norm": 0.1635693907737732, "learning_rate": 1.7875936071729682e-05, "loss": 0.4908, "step": 2922 }, { "epoch": 0.6520187374525988, "grad_norm": 0.1630011945962906, "learning_rate": 1.7874485682260087e-05, "loss": 0.5138, "step": 2923 }, { "epoch": 0.652241802364488, "grad_norm": 0.16332125663757324, "learning_rate": 1.7873034856651944e-05, "loss": 0.5119, "step": 2924 }, { "epoch": 0.6524648672763774, "grad_norm": 0.1802121102809906, "learning_rate": 1.787158359498561e-05, "loss": 0.4766, "step": 2925 }, { "epoch": 0.6526879321882668, "grad_norm": 0.16062189638614655, "learning_rate": 1.7870131897341458e-05, "loss": 0.4747, "step": 2926 }, { "epoch": 0.6529109971001561, "grad_norm": 0.16367454826831818, "learning_rate": 1.7868679763799898e-05, "loss": 0.4698, "step": 2927 }, { "epoch": 0.6531340620120455, "grad_norm": 0.16663846373558044, "learning_rate": 1.786722719444136e-05, "loss": 0.4903, "step": 2928 }, { "epoch": 0.6533571269239349, "grad_norm": 0.16041219234466553, "learning_rate": 1.786577418934629e-05, "loss": 0.496, "step": 2929 }, { "epoch": 0.6535801918358243, "grad_norm": 0.1660057008266449, "learning_rate": 1.7864320748595168e-05, "loss": 0.497, "step": 2930 }, { "epoch": 0.6538032567477136, "grad_norm": 0.17890667915344238, "learning_rate": 1.7862866872268493e-05, "loss": 0.5183, "step": 2931 }, { "epoch": 0.654026321659603, "grad_norm": 0.15913154184818268, "learning_rate": 1.7861412560446794e-05, "loss": 0.4827, "step": 2932 }, { "epoch": 0.6542493865714923, "grad_norm": 0.16374540328979492, "learning_rate": 1.7859957813210614e-05, "loss": 0.5119, "step": 2933 }, { "epoch": 0.6544724514833816, "grad_norm": 0.1748073399066925, "learning_rate": 1.7858502630640533e-05, "loss": 0.493, "step": 2934 }, { "epoch": 0.654695516395271, "grad_norm": 0.1679995357990265, "learning_rate": 1.7857047012817144e-05, "loss": 0.4878, "step": 2935 }, { "epoch": 0.6549185813071604, "grad_norm": 0.16782502830028534, "learning_rate": 1.7855590959821068e-05, "loss": 0.4942, "step": 2936 }, { "epoch": 0.6551416462190497, "grad_norm": 0.1666112244129181, "learning_rate": 1.785413447173295e-05, "loss": 0.5109, "step": 2937 }, { "epoch": 0.6553647111309391, "grad_norm": 0.16338278353214264, "learning_rate": 1.785267754863346e-05, "loss": 0.509, "step": 2938 }, { "epoch": 0.6555877760428285, "grad_norm": 0.16831496357917786, "learning_rate": 1.7851220190603295e-05, "loss": 0.4933, "step": 2939 }, { "epoch": 0.6558108409547179, "grad_norm": 0.16522802412509918, "learning_rate": 1.7849762397723168e-05, "loss": 0.5204, "step": 2940 }, { "epoch": 0.6560339058666071, "grad_norm": 0.1579723209142685, "learning_rate": 1.7848304170073822e-05, "loss": 0.4747, "step": 2941 }, { "epoch": 0.6562569707784965, "grad_norm": 0.16369852423667908, "learning_rate": 1.784684550773602e-05, "loss": 0.4763, "step": 2942 }, { "epoch": 0.6564800356903859, "grad_norm": 0.16621820628643036, "learning_rate": 1.7845386410790558e-05, "loss": 0.5113, "step": 2943 }, { "epoch": 0.6567031006022752, "grad_norm": 0.161569282412529, "learning_rate": 1.784392687931825e-05, "loss": 0.4899, "step": 2944 }, { "epoch": 0.6569261655141646, "grad_norm": 0.1616351306438446, "learning_rate": 1.7842466913399928e-05, "loss": 0.4879, "step": 2945 }, { "epoch": 0.657149230426054, "grad_norm": 0.16426199674606323, "learning_rate": 1.7841006513116456e-05, "loss": 0.5265, "step": 2946 }, { "epoch": 0.6573722953379434, "grad_norm": 0.1592702567577362, "learning_rate": 1.7839545678548727e-05, "loss": 0.5036, "step": 2947 }, { "epoch": 0.6575953602498327, "grad_norm": 0.15806902945041656, "learning_rate": 1.7838084409777637e-05, "loss": 0.4954, "step": 2948 }, { "epoch": 0.6578184251617221, "grad_norm": 0.17478401958942413, "learning_rate": 1.7836622706884138e-05, "loss": 0.5046, "step": 2949 }, { "epoch": 0.6580414900736115, "grad_norm": 0.16117317974567413, "learning_rate": 1.7835160569949174e-05, "loss": 0.4925, "step": 2950 }, { "epoch": 0.6582645549855007, "grad_norm": 0.1660463809967041, "learning_rate": 1.783369799905373e-05, "loss": 0.5003, "step": 2951 }, { "epoch": 0.6584876198973901, "grad_norm": 0.1700252741575241, "learning_rate": 1.7832234994278822e-05, "loss": 0.5169, "step": 2952 }, { "epoch": 0.6587106848092795, "grad_norm": 0.17790904641151428, "learning_rate": 1.7830771555705468e-05, "loss": 0.5088, "step": 2953 }, { "epoch": 0.6589337497211689, "grad_norm": 0.16493864357471466, "learning_rate": 1.782930768341473e-05, "loss": 0.5082, "step": 2954 }, { "epoch": 0.6591568146330582, "grad_norm": 0.16437609493732452, "learning_rate": 1.7827843377487683e-05, "loss": 0.4984, "step": 2955 }, { "epoch": 0.6593798795449476, "grad_norm": 0.16439390182495117, "learning_rate": 1.7826378638005432e-05, "loss": 0.5005, "step": 2956 }, { "epoch": 0.659602944456837, "grad_norm": 0.15785926580429077, "learning_rate": 1.78249134650491e-05, "loss": 0.5005, "step": 2957 }, { "epoch": 0.6598260093687263, "grad_norm": 0.20435695350170135, "learning_rate": 1.782344785869984e-05, "loss": 0.4711, "step": 2958 }, { "epoch": 0.6600490742806157, "grad_norm": 0.15359720587730408, "learning_rate": 1.7821981819038828e-05, "loss": 0.4738, "step": 2959 }, { "epoch": 0.660272139192505, "grad_norm": 0.17044633626937866, "learning_rate": 1.7820515346147262e-05, "loss": 0.4951, "step": 2960 }, { "epoch": 0.6604952041043943, "grad_norm": 0.17282849550247192, "learning_rate": 1.781904844010636e-05, "loss": 0.496, "step": 2961 }, { "epoch": 0.6607182690162837, "grad_norm": 0.1486678570508957, "learning_rate": 1.7817581100997374e-05, "loss": 0.4671, "step": 2962 }, { "epoch": 0.6609413339281731, "grad_norm": 0.16091682016849518, "learning_rate": 1.781611332890157e-05, "loss": 0.4824, "step": 2963 }, { "epoch": 0.6611643988400625, "grad_norm": 0.15881048142910004, "learning_rate": 1.7814645123900246e-05, "loss": 0.4864, "step": 2964 }, { "epoch": 0.6613874637519518, "grad_norm": 0.17700685560703278, "learning_rate": 1.781317648607472e-05, "loss": 0.4962, "step": 2965 }, { "epoch": 0.6616105286638412, "grad_norm": 0.15473395586013794, "learning_rate": 1.781170741550633e-05, "loss": 0.4737, "step": 2966 }, { "epoch": 0.6618335935757306, "grad_norm": 0.1670142114162445, "learning_rate": 1.781023791227645e-05, "loss": 0.5105, "step": 2967 }, { "epoch": 0.6620566584876199, "grad_norm": 0.18345218896865845, "learning_rate": 1.780876797646646e-05, "loss": 0.5086, "step": 2968 }, { "epoch": 0.6622797233995092, "grad_norm": 0.16229334473609924, "learning_rate": 1.7807297608157784e-05, "loss": 0.5007, "step": 2969 }, { "epoch": 0.6625027883113986, "grad_norm": 0.16335418820381165, "learning_rate": 1.7805826807431856e-05, "loss": 0.4976, "step": 2970 }, { "epoch": 0.662725853223288, "grad_norm": 0.16784057021141052, "learning_rate": 1.780435557437014e-05, "loss": 0.5106, "step": 2971 }, { "epoch": 0.6629489181351773, "grad_norm": 0.16552112996578217, "learning_rate": 1.7802883909054118e-05, "loss": 0.5087, "step": 2972 }, { "epoch": 0.6631719830470667, "grad_norm": 0.206945538520813, "learning_rate": 1.7801411811565308e-05, "loss": 0.4753, "step": 2973 }, { "epoch": 0.6633950479589561, "grad_norm": 0.1575002670288086, "learning_rate": 1.7799939281985236e-05, "loss": 0.4875, "step": 2974 }, { "epoch": 0.6636181128708454, "grad_norm": 0.1623086929321289, "learning_rate": 1.7798466320395463e-05, "loss": 0.4933, "step": 2975 }, { "epoch": 0.6638411777827348, "grad_norm": 0.1668468862771988, "learning_rate": 1.779699292687757e-05, "loss": 0.462, "step": 2976 }, { "epoch": 0.6640642426946242, "grad_norm": 0.43826133012771606, "learning_rate": 1.7795519101513166e-05, "loss": 0.5073, "step": 2977 }, { "epoch": 0.6642873076065134, "grad_norm": 0.18710243701934814, "learning_rate": 1.779404484438388e-05, "loss": 0.5103, "step": 2978 }, { "epoch": 0.6645103725184028, "grad_norm": 0.17884770035743713, "learning_rate": 1.7792570155571358e-05, "loss": 0.5219, "step": 2979 }, { "epoch": 0.6647334374302922, "grad_norm": 0.1652306616306305, "learning_rate": 1.7791095035157288e-05, "loss": 0.513, "step": 2980 }, { "epoch": 0.6649565023421816, "grad_norm": 0.175328329205513, "learning_rate": 1.7789619483223367e-05, "loss": 0.5064, "step": 2981 }, { "epoch": 0.6651795672540709, "grad_norm": 0.15750913321971893, "learning_rate": 1.7788143499851318e-05, "loss": 0.4608, "step": 2982 }, { "epoch": 0.6654026321659603, "grad_norm": 0.15582314133644104, "learning_rate": 1.7786667085122895e-05, "loss": 0.4793, "step": 2983 }, { "epoch": 0.6656256970778497, "grad_norm": 0.18893486261367798, "learning_rate": 1.7785190239119864e-05, "loss": 0.5277, "step": 2984 }, { "epoch": 0.665848761989739, "grad_norm": 0.16801688075065613, "learning_rate": 1.7783712961924032e-05, "loss": 0.5281, "step": 2985 }, { "epoch": 0.6660718269016284, "grad_norm": 0.16510803997516632, "learning_rate": 1.778223525361721e-05, "loss": 0.4675, "step": 2986 }, { "epoch": 0.6662948918135178, "grad_norm": 0.16860422492027283, "learning_rate": 1.778075711428125e-05, "loss": 0.4712, "step": 2987 }, { "epoch": 0.6665179567254071, "grad_norm": 0.17091190814971924, "learning_rate": 1.777927854399802e-05, "loss": 0.5208, "step": 2988 }, { "epoch": 0.6667410216372964, "grad_norm": 0.19528479874134064, "learning_rate": 1.7777799542849408e-05, "loss": 0.482, "step": 2989 }, { "epoch": 0.6669640865491858, "grad_norm": 0.16672208905220032, "learning_rate": 1.7776320110917334e-05, "loss": 0.5002, "step": 2990 }, { "epoch": 0.6671871514610752, "grad_norm": 0.1595972180366516, "learning_rate": 1.777484024828374e-05, "loss": 0.4868, "step": 2991 }, { "epoch": 0.6674102163729645, "grad_norm": 0.16304026544094086, "learning_rate": 1.7773359955030583e-05, "loss": 0.4839, "step": 2992 }, { "epoch": 0.6676332812848539, "grad_norm": 0.17252400517463684, "learning_rate": 1.7771879231239857e-05, "loss": 0.4737, "step": 2993 }, { "epoch": 0.6678563461967433, "grad_norm": 0.17409226298332214, "learning_rate": 1.777039807699357e-05, "loss": 0.5152, "step": 2994 }, { "epoch": 0.6680794111086326, "grad_norm": 0.15897680819034576, "learning_rate": 1.7768916492373763e-05, "loss": 0.5081, "step": 2995 }, { "epoch": 0.668302476020522, "grad_norm": 0.15727804601192474, "learning_rate": 1.7767434477462493e-05, "loss": 0.4902, "step": 2996 }, { "epoch": 0.6685255409324113, "grad_norm": 0.16594363749027252, "learning_rate": 1.776595203234184e-05, "loss": 0.4982, "step": 2997 }, { "epoch": 0.6687486058443007, "grad_norm": 0.16226732730865479, "learning_rate": 1.7764469157093916e-05, "loss": 0.5304, "step": 2998 }, { "epoch": 0.66897167075619, "grad_norm": 0.15772786736488342, "learning_rate": 1.7762985851800846e-05, "loss": 0.4707, "step": 2999 }, { "epoch": 0.6691947356680794, "grad_norm": 0.19541241228580475, "learning_rate": 1.776150211654479e-05, "loss": 0.4826, "step": 3000 }, { "epoch": 0.6694178005799688, "grad_norm": 0.17381907999515533, "learning_rate": 1.7760017951407924e-05, "loss": 0.5304, "step": 3001 }, { "epoch": 0.6696408654918581, "grad_norm": 0.16170580685138702, "learning_rate": 1.7758533356472454e-05, "loss": 0.4722, "step": 3002 }, { "epoch": 0.6698639304037475, "grad_norm": 0.16046100854873657, "learning_rate": 1.7757048331820604e-05, "loss": 0.4852, "step": 3003 }, { "epoch": 0.6700869953156369, "grad_norm": 0.17672637104988098, "learning_rate": 1.775556287753462e-05, "loss": 0.5, "step": 3004 }, { "epoch": 0.6703100602275263, "grad_norm": 0.15894585847854614, "learning_rate": 1.7754076993696784e-05, "loss": 0.4819, "step": 3005 }, { "epoch": 0.6705331251394155, "grad_norm": 0.16397017240524292, "learning_rate": 1.7752590680389382e-05, "loss": 0.5348, "step": 3006 }, { "epoch": 0.6707561900513049, "grad_norm": 0.17470906674861908, "learning_rate": 1.7751103937694748e-05, "loss": 0.5056, "step": 3007 }, { "epoch": 0.6709792549631943, "grad_norm": 0.16147972643375397, "learning_rate": 1.774961676569522e-05, "loss": 0.4845, "step": 3008 }, { "epoch": 0.6712023198750836, "grad_norm": 0.15075084567070007, "learning_rate": 1.774812916447317e-05, "loss": 0.4873, "step": 3009 }, { "epoch": 0.671425384786973, "grad_norm": 0.1605006605386734, "learning_rate": 1.774664113411099e-05, "loss": 0.4885, "step": 3010 }, { "epoch": 0.6716484496988624, "grad_norm": 0.17120316624641418, "learning_rate": 1.7745152674691093e-05, "loss": 0.4952, "step": 3011 }, { "epoch": 0.6718715146107517, "grad_norm": 0.17164325714111328, "learning_rate": 1.774366378629592e-05, "loss": 0.5225, "step": 3012 }, { "epoch": 0.6720945795226411, "grad_norm": 0.16912591457366943, "learning_rate": 1.774217446900794e-05, "loss": 0.5165, "step": 3013 }, { "epoch": 0.6723176444345305, "grad_norm": 0.1592075079679489, "learning_rate": 1.7740684722909638e-05, "loss": 0.481, "step": 3014 }, { "epoch": 0.6725407093464199, "grad_norm": 0.15714260935783386, "learning_rate": 1.7739194548083526e-05, "loss": 0.5122, "step": 3015 }, { "epoch": 0.6727637742583091, "grad_norm": 0.15991204977035522, "learning_rate": 1.7737703944612135e-05, "loss": 0.5006, "step": 3016 }, { "epoch": 0.6729868391701985, "grad_norm": 0.15693789720535278, "learning_rate": 1.7736212912578028e-05, "loss": 0.4867, "step": 3017 }, { "epoch": 0.6732099040820879, "grad_norm": 0.15840266644954681, "learning_rate": 1.773472145206379e-05, "loss": 0.4937, "step": 3018 }, { "epoch": 0.6734329689939772, "grad_norm": 0.16329781711101532, "learning_rate": 1.7733229563152024e-05, "loss": 0.489, "step": 3019 }, { "epoch": 0.6736560339058666, "grad_norm": 0.17248262465000153, "learning_rate": 1.7731737245925357e-05, "loss": 0.5112, "step": 3020 }, { "epoch": 0.673879098817756, "grad_norm": 0.1815134584903717, "learning_rate": 1.7730244500466454e-05, "loss": 0.5027, "step": 3021 }, { "epoch": 0.6741021637296454, "grad_norm": 0.17082563042640686, "learning_rate": 1.772875132685798e-05, "loss": 0.5018, "step": 3022 }, { "epoch": 0.6743252286415347, "grad_norm": 0.17142242193222046, "learning_rate": 1.772725772518264e-05, "loss": 0.5106, "step": 3023 }, { "epoch": 0.674548293553424, "grad_norm": 0.17516618967056274, "learning_rate": 1.7725763695523166e-05, "loss": 0.4743, "step": 3024 }, { "epoch": 0.6747713584653134, "grad_norm": 0.16038702428340912, "learning_rate": 1.77242692379623e-05, "loss": 0.5071, "step": 3025 }, { "epoch": 0.6749944233772027, "grad_norm": 0.16445225477218628, "learning_rate": 1.7722774352582816e-05, "loss": 0.4992, "step": 3026 }, { "epoch": 0.6752174882890921, "grad_norm": 0.16889688372612, "learning_rate": 1.772127903946751e-05, "loss": 0.4909, "step": 3027 }, { "epoch": 0.6754405532009815, "grad_norm": 0.17117194831371307, "learning_rate": 1.77197832986992e-05, "loss": 0.4927, "step": 3028 }, { "epoch": 0.6756636181128709, "grad_norm": 0.16765987873077393, "learning_rate": 1.7718287130360733e-05, "loss": 0.5065, "step": 3029 }, { "epoch": 0.6758866830247602, "grad_norm": 0.1618220955133438, "learning_rate": 1.7716790534534977e-05, "loss": 0.4931, "step": 3030 }, { "epoch": 0.6761097479366496, "grad_norm": 0.16639363765716553, "learning_rate": 1.7715293511304815e-05, "loss": 0.5044, "step": 3031 }, { "epoch": 0.676332812848539, "grad_norm": 0.167351633310318, "learning_rate": 1.7713796060753173e-05, "loss": 0.5188, "step": 3032 }, { "epoch": 0.6765558777604282, "grad_norm": 0.1656593531370163, "learning_rate": 1.771229818296298e-05, "loss": 0.5271, "step": 3033 }, { "epoch": 0.6767789426723176, "grad_norm": 0.168874591588974, "learning_rate": 1.7710799878017203e-05, "loss": 0.4947, "step": 3034 }, { "epoch": 0.677002007584207, "grad_norm": 0.16307704150676727, "learning_rate": 1.7709301145998827e-05, "loss": 0.5071, "step": 3035 }, { "epoch": 0.6772250724960963, "grad_norm": 0.1524697244167328, "learning_rate": 1.7707801986990857e-05, "loss": 0.497, "step": 3036 }, { "epoch": 0.6774481374079857, "grad_norm": 0.16599062085151672, "learning_rate": 1.7706302401076327e-05, "loss": 0.4726, "step": 3037 }, { "epoch": 0.6776712023198751, "grad_norm": 0.16413775086402893, "learning_rate": 1.77048023883383e-05, "loss": 0.527, "step": 3038 }, { "epoch": 0.6778942672317645, "grad_norm": 0.1584494262933731, "learning_rate": 1.770330194885985e-05, "loss": 0.5095, "step": 3039 }, { "epoch": 0.6781173321436538, "grad_norm": 0.15526416897773743, "learning_rate": 1.7701801082724084e-05, "loss": 0.4914, "step": 3040 }, { "epoch": 0.6783403970555432, "grad_norm": 0.16402754187583923, "learning_rate": 1.7700299790014126e-05, "loss": 0.5184, "step": 3041 }, { "epoch": 0.6785634619674326, "grad_norm": 0.16508348286151886, "learning_rate": 1.769879807081313e-05, "loss": 0.4783, "step": 3042 }, { "epoch": 0.6787865268793218, "grad_norm": 0.15416789054870605, "learning_rate": 1.769729592520427e-05, "loss": 0.4818, "step": 3043 }, { "epoch": 0.6790095917912112, "grad_norm": 0.27602240443229675, "learning_rate": 1.769579335327074e-05, "loss": 0.49, "step": 3044 }, { "epoch": 0.6792326567031006, "grad_norm": 0.15309108793735504, "learning_rate": 1.7694290355095768e-05, "loss": 0.4778, "step": 3045 }, { "epoch": 0.67945572161499, "grad_norm": 0.15830327570438385, "learning_rate": 1.76927869307626e-05, "loss": 0.4909, "step": 3046 }, { "epoch": 0.6796787865268793, "grad_norm": 0.16737417876720428, "learning_rate": 1.76912830803545e-05, "loss": 0.4642, "step": 3047 }, { "epoch": 0.6799018514387687, "grad_norm": 0.16713353991508484, "learning_rate": 1.7689778803954764e-05, "loss": 0.5076, "step": 3048 }, { "epoch": 0.6801249163506581, "grad_norm": 0.15644113719463348, "learning_rate": 1.7688274101646702e-05, "loss": 0.4746, "step": 3049 }, { "epoch": 0.6803479812625474, "grad_norm": 0.1714516133069992, "learning_rate": 1.7686768973513663e-05, "loss": 0.4924, "step": 3050 }, { "epoch": 0.6805710461744368, "grad_norm": 0.1625642329454422, "learning_rate": 1.7685263419639008e-05, "loss": 0.5103, "step": 3051 }, { "epoch": 0.6807941110863261, "grad_norm": 0.15343210101127625, "learning_rate": 1.768375744010612e-05, "loss": 0.4871, "step": 3052 }, { "epoch": 0.6810171759982154, "grad_norm": 0.1622699499130249, "learning_rate": 1.7682251034998413e-05, "loss": 0.5093, "step": 3053 }, { "epoch": 0.6812402409101048, "grad_norm": 0.15701012313365936, "learning_rate": 1.768074420439932e-05, "loss": 0.5052, "step": 3054 }, { "epoch": 0.6814633058219942, "grad_norm": 0.1647741198539734, "learning_rate": 1.76792369483923e-05, "loss": 0.528, "step": 3055 }, { "epoch": 0.6816863707338836, "grad_norm": 0.16413739323616028, "learning_rate": 1.7677729267060836e-05, "loss": 0.5029, "step": 3056 }, { "epoch": 0.6819094356457729, "grad_norm": 0.1663079708814621, "learning_rate": 1.7676221160488426e-05, "loss": 0.4961, "step": 3057 }, { "epoch": 0.6821325005576623, "grad_norm": 0.1598016768693924, "learning_rate": 1.7674712628758603e-05, "loss": 0.5201, "step": 3058 }, { "epoch": 0.6823555654695517, "grad_norm": 0.18910476565361023, "learning_rate": 1.767320367195492e-05, "loss": 0.5203, "step": 3059 }, { "epoch": 0.682578630381441, "grad_norm": 0.1525489091873169, "learning_rate": 1.767169429016095e-05, "loss": 0.4609, "step": 3060 }, { "epoch": 0.6828016952933303, "grad_norm": 0.22233475744724274, "learning_rate": 1.7670184483460296e-05, "loss": 0.4936, "step": 3061 }, { "epoch": 0.6830247602052197, "grad_norm": 0.16714778542518616, "learning_rate": 1.766867425193658e-05, "loss": 0.5248, "step": 3062 }, { "epoch": 0.6832478251171091, "grad_norm": 0.1673208624124527, "learning_rate": 1.766716359567344e-05, "loss": 0.5056, "step": 3063 }, { "epoch": 0.6834708900289984, "grad_norm": 0.1797683835029602, "learning_rate": 1.7665652514754554e-05, "loss": 0.5039, "step": 3064 }, { "epoch": 0.6836939549408878, "grad_norm": 0.1645476371049881, "learning_rate": 1.7664141009263614e-05, "loss": 0.4907, "step": 3065 }, { "epoch": 0.6839170198527772, "grad_norm": 0.15605735778808594, "learning_rate": 1.7662629079284336e-05, "loss": 0.4821, "step": 3066 }, { "epoch": 0.6841400847646665, "grad_norm": 0.16608816385269165, "learning_rate": 1.7661116724900456e-05, "loss": 0.5028, "step": 3067 }, { "epoch": 0.6843631496765559, "grad_norm": 0.16926267743110657, "learning_rate": 1.7659603946195746e-05, "loss": 0.4933, "step": 3068 }, { "epoch": 0.6845862145884453, "grad_norm": 0.15846210718154907, "learning_rate": 1.7658090743253985e-05, "loss": 0.4991, "step": 3069 }, { "epoch": 0.6848092795003345, "grad_norm": 0.16848038136959076, "learning_rate": 1.7656577116158988e-05, "loss": 0.5002, "step": 3070 }, { "epoch": 0.6850323444122239, "grad_norm": 0.16113083064556122, "learning_rate": 1.765506306499459e-05, "loss": 0.4999, "step": 3071 }, { "epoch": 0.6852554093241133, "grad_norm": 0.1538555920124054, "learning_rate": 1.7653548589844648e-05, "loss": 0.4812, "step": 3072 }, { "epoch": 0.6854784742360027, "grad_norm": 0.16473166644573212, "learning_rate": 1.765203369079304e-05, "loss": 0.4885, "step": 3073 }, { "epoch": 0.685701539147892, "grad_norm": 0.15544745326042175, "learning_rate": 1.765051836792367e-05, "loss": 0.4901, "step": 3074 }, { "epoch": 0.6859246040597814, "grad_norm": 0.1728777289390564, "learning_rate": 1.764900262132048e-05, "loss": 0.4578, "step": 3075 }, { "epoch": 0.6861476689716708, "grad_norm": 0.16069582104682922, "learning_rate": 1.76474864510674e-05, "loss": 0.4904, "step": 3076 }, { "epoch": 0.6863707338835601, "grad_norm": 0.15537656843662262, "learning_rate": 1.764596985724842e-05, "loss": 0.4899, "step": 3077 }, { "epoch": 0.6865937987954495, "grad_norm": 0.16955626010894775, "learning_rate": 1.7644452839947536e-05, "loss": 0.5011, "step": 3078 }, { "epoch": 0.6868168637073389, "grad_norm": 0.16193504631519318, "learning_rate": 1.7642935399248765e-05, "loss": 0.5128, "step": 3079 }, { "epoch": 0.6870399286192282, "grad_norm": 0.1631263643503189, "learning_rate": 1.7641417535236155e-05, "loss": 0.5026, "step": 3080 }, { "epoch": 0.6872629935311175, "grad_norm": 0.18346811830997467, "learning_rate": 1.7639899247993775e-05, "loss": 0.4857, "step": 3081 }, { "epoch": 0.6874860584430069, "grad_norm": 0.16757658123970032, "learning_rate": 1.7638380537605722e-05, "loss": 0.4935, "step": 3082 }, { "epoch": 0.6877091233548963, "grad_norm": 0.18027780950069427, "learning_rate": 1.7636861404156106e-05, "loss": 0.5448, "step": 3083 }, { "epoch": 0.6879321882667856, "grad_norm": 0.17672498524188995, "learning_rate": 1.763534184772907e-05, "loss": 0.5054, "step": 3084 }, { "epoch": 0.688155253178675, "grad_norm": 0.15495164692401886, "learning_rate": 1.763382186840877e-05, "loss": 0.4778, "step": 3085 }, { "epoch": 0.6883783180905644, "grad_norm": 0.16750416159629822, "learning_rate": 1.76323014662794e-05, "loss": 0.5147, "step": 3086 }, { "epoch": 0.6886013830024537, "grad_norm": 0.16243582963943481, "learning_rate": 1.763078064142516e-05, "loss": 0.5029, "step": 3087 }, { "epoch": 0.688824447914343, "grad_norm": 0.1765555739402771, "learning_rate": 1.7629259393930292e-05, "loss": 0.5402, "step": 3088 }, { "epoch": 0.6890475128262324, "grad_norm": 0.17327693104743958, "learning_rate": 1.7627737723879048e-05, "loss": 0.5223, "step": 3089 }, { "epoch": 0.6892705777381218, "grad_norm": 0.16204944252967834, "learning_rate": 1.762621563135571e-05, "loss": 0.4926, "step": 3090 }, { "epoch": 0.6894936426500111, "grad_norm": 0.16001787781715393, "learning_rate": 1.762469311644458e-05, "loss": 0.5038, "step": 3091 }, { "epoch": 0.6897167075619005, "grad_norm": 0.15672995150089264, "learning_rate": 1.7623170179229982e-05, "loss": 0.4752, "step": 3092 }, { "epoch": 0.6899397724737899, "grad_norm": 0.16134774684906006, "learning_rate": 1.7621646819796264e-05, "loss": 0.4911, "step": 3093 }, { "epoch": 0.6901628373856792, "grad_norm": 0.18240399658679962, "learning_rate": 1.762012303822781e-05, "loss": 0.4956, "step": 3094 }, { "epoch": 0.6903859022975686, "grad_norm": 0.16389527916908264, "learning_rate": 1.761859883460901e-05, "loss": 0.4713, "step": 3095 }, { "epoch": 0.690608967209458, "grad_norm": 0.16408023238182068, "learning_rate": 1.761707420902428e-05, "loss": 0.4925, "step": 3096 }, { "epoch": 0.6908320321213474, "grad_norm": 0.16707156598567963, "learning_rate": 1.761554916155807e-05, "loss": 0.5094, "step": 3097 }, { "epoch": 0.6910550970332366, "grad_norm": 0.1580830216407776, "learning_rate": 1.7614023692294838e-05, "loss": 0.4896, "step": 3098 }, { "epoch": 0.691278161945126, "grad_norm": 0.1949886530637741, "learning_rate": 1.7612497801319084e-05, "loss": 0.4919, "step": 3099 }, { "epoch": 0.6915012268570154, "grad_norm": 0.1662502884864807, "learning_rate": 1.7610971488715315e-05, "loss": 0.5104, "step": 3100 }, { "epoch": 0.6917242917689047, "grad_norm": 0.15772640705108643, "learning_rate": 1.760944475456807e-05, "loss": 0.4699, "step": 3101 }, { "epoch": 0.6919473566807941, "grad_norm": 0.17815501987934113, "learning_rate": 1.760791759896191e-05, "loss": 0.5133, "step": 3102 }, { "epoch": 0.6921704215926835, "grad_norm": 0.16980375349521637, "learning_rate": 1.760639002198142e-05, "loss": 0.4874, "step": 3103 }, { "epoch": 0.6923934865045729, "grad_norm": 0.16098888218402863, "learning_rate": 1.7604862023711204e-05, "loss": 0.508, "step": 3104 }, { "epoch": 0.6926165514164622, "grad_norm": 0.16175471246242523, "learning_rate": 1.760333360423589e-05, "loss": 0.4963, "step": 3105 }, { "epoch": 0.6928396163283516, "grad_norm": 0.1580919772386551, "learning_rate": 1.7601804763640137e-05, "loss": 0.4674, "step": 3106 }, { "epoch": 0.693062681240241, "grad_norm": 0.15538008511066437, "learning_rate": 1.7600275502008618e-05, "loss": 0.4907, "step": 3107 }, { "epoch": 0.6932857461521302, "grad_norm": 0.15990300476551056, "learning_rate": 1.7598745819426034e-05, "loss": 0.5035, "step": 3108 }, { "epoch": 0.6935088110640196, "grad_norm": 0.1725892871618271, "learning_rate": 1.759721571597711e-05, "loss": 0.5196, "step": 3109 }, { "epoch": 0.693731875975909, "grad_norm": 0.16250944137573242, "learning_rate": 1.7595685191746586e-05, "loss": 0.4907, "step": 3110 }, { "epoch": 0.6939549408877983, "grad_norm": 0.16884207725524902, "learning_rate": 1.759415424681924e-05, "loss": 0.535, "step": 3111 }, { "epoch": 0.6941780057996877, "grad_norm": 0.1701684594154358, "learning_rate": 1.7592622881279867e-05, "loss": 0.4878, "step": 3112 }, { "epoch": 0.6944010707115771, "grad_norm": 0.17077569663524628, "learning_rate": 1.7591091095213277e-05, "loss": 0.4851, "step": 3113 }, { "epoch": 0.6946241356234665, "grad_norm": 0.33712977170944214, "learning_rate": 1.758955888870431e-05, "loss": 0.476, "step": 3114 }, { "epoch": 0.6948472005353558, "grad_norm": 0.16534623503684998, "learning_rate": 1.7588026261837833e-05, "loss": 0.4962, "step": 3115 }, { "epoch": 0.6950702654472452, "grad_norm": 0.17375873029232025, "learning_rate": 1.758649321469873e-05, "loss": 0.4883, "step": 3116 }, { "epoch": 0.6952933303591345, "grad_norm": 0.16607031226158142, "learning_rate": 1.758495974737191e-05, "loss": 0.4727, "step": 3117 }, { "epoch": 0.6955163952710238, "grad_norm": 0.16579467058181763, "learning_rate": 1.7583425859942312e-05, "loss": 0.5156, "step": 3118 }, { "epoch": 0.6957394601829132, "grad_norm": 0.16067442297935486, "learning_rate": 1.7581891552494886e-05, "loss": 0.4797, "step": 3119 }, { "epoch": 0.6959625250948026, "grad_norm": 0.16372907161712646, "learning_rate": 1.7580356825114616e-05, "loss": 0.4918, "step": 3120 }, { "epoch": 0.696185590006692, "grad_norm": 0.1595798134803772, "learning_rate": 1.75788216778865e-05, "loss": 0.4963, "step": 3121 }, { "epoch": 0.6964086549185813, "grad_norm": 0.16394098103046417, "learning_rate": 1.757728611089557e-05, "loss": 0.5189, "step": 3122 }, { "epoch": 0.6966317198304707, "grad_norm": 0.16349278390407562, "learning_rate": 1.757575012422687e-05, "loss": 0.5392, "step": 3123 }, { "epoch": 0.6968547847423601, "grad_norm": 0.16474361717700958, "learning_rate": 1.7574213717965473e-05, "loss": 0.498, "step": 3124 }, { "epoch": 0.6970778496542493, "grad_norm": 0.16759975254535675, "learning_rate": 1.757267689219648e-05, "loss": 0.4909, "step": 3125 }, { "epoch": 0.6973009145661387, "grad_norm": 0.18048445880413055, "learning_rate": 1.7571139647005004e-05, "loss": 0.5313, "step": 3126 }, { "epoch": 0.6975239794780281, "grad_norm": 0.21055570244789124, "learning_rate": 1.7569601982476194e-05, "loss": 0.4773, "step": 3127 }, { "epoch": 0.6977470443899174, "grad_norm": 0.16457606852054596, "learning_rate": 1.7568063898695205e-05, "loss": 0.4817, "step": 3128 }, { "epoch": 0.6979701093018068, "grad_norm": 0.15786704421043396, "learning_rate": 1.7566525395747237e-05, "loss": 0.4793, "step": 3129 }, { "epoch": 0.6981931742136962, "grad_norm": 0.17408685386180878, "learning_rate": 1.7564986473717498e-05, "loss": 0.5238, "step": 3130 }, { "epoch": 0.6984162391255856, "grad_norm": 0.1875070482492447, "learning_rate": 1.7563447132691222e-05, "loss": 0.5133, "step": 3131 }, { "epoch": 0.6986393040374749, "grad_norm": 0.15555784106254578, "learning_rate": 1.7561907372753665e-05, "loss": 0.4851, "step": 3132 }, { "epoch": 0.6988623689493643, "grad_norm": 0.15845829248428345, "learning_rate": 1.756036719399011e-05, "loss": 0.4989, "step": 3133 }, { "epoch": 0.6990854338612537, "grad_norm": 0.20709070563316345, "learning_rate": 1.7558826596485866e-05, "loss": 0.4939, "step": 3134 }, { "epoch": 0.6993084987731429, "grad_norm": 0.17586678266525269, "learning_rate": 1.755728558032626e-05, "loss": 0.5028, "step": 3135 }, { "epoch": 0.6995315636850323, "grad_norm": 0.16700804233551025, "learning_rate": 1.7555744145596638e-05, "loss": 0.4955, "step": 3136 }, { "epoch": 0.6997546285969217, "grad_norm": 0.17724819481372833, "learning_rate": 1.755420229238238e-05, "loss": 0.4571, "step": 3137 }, { "epoch": 0.6999776935088111, "grad_norm": 0.17517662048339844, "learning_rate": 1.755266002076888e-05, "loss": 0.5144, "step": 3138 }, { "epoch": 0.7002007584207004, "grad_norm": 0.16020509600639343, "learning_rate": 1.755111733084156e-05, "loss": 0.4523, "step": 3139 }, { "epoch": 0.7004238233325898, "grad_norm": 0.15964041650295258, "learning_rate": 1.7549574222685864e-05, "loss": 0.4695, "step": 3140 }, { "epoch": 0.7006468882444792, "grad_norm": 0.17395976185798645, "learning_rate": 1.754803069638726e-05, "loss": 0.515, "step": 3141 }, { "epoch": 0.7008699531563685, "grad_norm": 0.15984676778316498, "learning_rate": 1.7546486752031237e-05, "loss": 0.4909, "step": 3142 }, { "epoch": 0.7010930180682579, "grad_norm": 0.16784314811229706, "learning_rate": 1.7544942389703305e-05, "loss": 0.4973, "step": 3143 }, { "epoch": 0.7013160829801472, "grad_norm": 0.16454213857650757, "learning_rate": 1.754339760948901e-05, "loss": 0.503, "step": 3144 }, { "epoch": 0.7015391478920365, "grad_norm": 0.1578417867422104, "learning_rate": 1.7541852411473902e-05, "loss": 0.4632, "step": 3145 }, { "epoch": 0.7017622128039259, "grad_norm": 0.16670000553131104, "learning_rate": 1.7540306795743566e-05, "loss": 0.4804, "step": 3146 }, { "epoch": 0.7019852777158153, "grad_norm": 0.15188740193843842, "learning_rate": 1.753876076238361e-05, "loss": 0.4884, "step": 3147 }, { "epoch": 0.7022083426277047, "grad_norm": 0.16512979567050934, "learning_rate": 1.7537214311479663e-05, "loss": 0.5002, "step": 3148 }, { "epoch": 0.702431407539594, "grad_norm": 0.15965288877487183, "learning_rate": 1.7535667443117377e-05, "loss": 0.4885, "step": 3149 }, { "epoch": 0.7026544724514834, "grad_norm": 0.1552652269601822, "learning_rate": 1.7534120157382425e-05, "loss": 0.4955, "step": 3150 }, { "epoch": 0.7028775373633728, "grad_norm": 0.16739974915981293, "learning_rate": 1.7532572454360506e-05, "loss": 0.4902, "step": 3151 }, { "epoch": 0.703100602275262, "grad_norm": 0.18262702226638794, "learning_rate": 1.7531024334137348e-05, "loss": 0.5283, "step": 3152 }, { "epoch": 0.7033236671871514, "grad_norm": 0.15477107465267181, "learning_rate": 1.7529475796798686e-05, "loss": 0.4969, "step": 3153 }, { "epoch": 0.7035467320990408, "grad_norm": 0.16841940581798553, "learning_rate": 1.7527926842430295e-05, "loss": 0.5351, "step": 3154 }, { "epoch": 0.7037697970109302, "grad_norm": 0.17480523884296417, "learning_rate": 1.7526377471117963e-05, "loss": 0.5279, "step": 3155 }, { "epoch": 0.7039928619228195, "grad_norm": 0.1635589897632599, "learning_rate": 1.75248276829475e-05, "loss": 0.5076, "step": 3156 }, { "epoch": 0.7042159268347089, "grad_norm": 0.18227113783359528, "learning_rate": 1.7523277478004747e-05, "loss": 0.4927, "step": 3157 }, { "epoch": 0.7044389917465983, "grad_norm": 0.17305073142051697, "learning_rate": 1.7521726856375568e-05, "loss": 0.4967, "step": 3158 }, { "epoch": 0.7046620566584876, "grad_norm": 0.16051623225212097, "learning_rate": 1.7520175818145838e-05, "loss": 0.5031, "step": 3159 }, { "epoch": 0.704885121570377, "grad_norm": 0.16289980709552765, "learning_rate": 1.751862436340147e-05, "loss": 0.5101, "step": 3160 }, { "epoch": 0.7051081864822664, "grad_norm": 0.16630062460899353, "learning_rate": 1.751707249222839e-05, "loss": 0.4928, "step": 3161 }, { "epoch": 0.7053312513941556, "grad_norm": 0.154288649559021, "learning_rate": 1.7515520204712552e-05, "loss": 0.4921, "step": 3162 }, { "epoch": 0.705554316306045, "grad_norm": 0.1542695015668869, "learning_rate": 1.751396750093993e-05, "loss": 0.4929, "step": 3163 }, { "epoch": 0.7057773812179344, "grad_norm": 0.1700357347726822, "learning_rate": 1.7512414380996524e-05, "loss": 0.5014, "step": 3164 }, { "epoch": 0.7060004461298238, "grad_norm": 0.1531635820865631, "learning_rate": 1.7510860844968355e-05, "loss": 0.4764, "step": 3165 }, { "epoch": 0.7062235110417131, "grad_norm": 0.1919977366924286, "learning_rate": 1.7509306892941464e-05, "loss": 0.5453, "step": 3166 }, { "epoch": 0.7064465759536025, "grad_norm": 0.16512922942638397, "learning_rate": 1.7507752525001924e-05, "loss": 0.5015, "step": 3167 }, { "epoch": 0.7066696408654919, "grad_norm": 0.1677495837211609, "learning_rate": 1.7506197741235822e-05, "loss": 0.5164, "step": 3168 }, { "epoch": 0.7068927057773812, "grad_norm": 0.15319664776325226, "learning_rate": 1.7504642541729273e-05, "loss": 0.4815, "step": 3169 }, { "epoch": 0.7071157706892706, "grad_norm": 0.16099566221237183, "learning_rate": 1.7503086926568416e-05, "loss": 0.4966, "step": 3170 }, { "epoch": 0.70733883560116, "grad_norm": 0.15989680588245392, "learning_rate": 1.750153089583941e-05, "loss": 0.4949, "step": 3171 }, { "epoch": 0.7075619005130493, "grad_norm": 0.16961508989334106, "learning_rate": 1.7499974449628433e-05, "loss": 0.4916, "step": 3172 }, { "epoch": 0.7077849654249386, "grad_norm": 0.1632029265165329, "learning_rate": 1.74984175880217e-05, "loss": 0.4921, "step": 3173 }, { "epoch": 0.708008030336828, "grad_norm": 0.1647975593805313, "learning_rate": 1.7496860311105426e-05, "loss": 0.4635, "step": 3174 }, { "epoch": 0.7082310952487174, "grad_norm": 0.15906676650047302, "learning_rate": 1.7495302618965874e-05, "loss": 0.4686, "step": 3175 }, { "epoch": 0.7084541601606067, "grad_norm": 0.16585782170295715, "learning_rate": 1.7493744511689316e-05, "loss": 0.5159, "step": 3176 }, { "epoch": 0.7086772250724961, "grad_norm": 0.16121666133403778, "learning_rate": 1.7492185989362052e-05, "loss": 0.5, "step": 3177 }, { "epoch": 0.7089002899843855, "grad_norm": 0.1639820635318756, "learning_rate": 1.7490627052070394e-05, "loss": 0.4959, "step": 3178 }, { "epoch": 0.7091233548962749, "grad_norm": 0.17450456321239471, "learning_rate": 1.74890676999007e-05, "loss": 0.4932, "step": 3179 }, { "epoch": 0.7093464198081642, "grad_norm": 0.1635378748178482, "learning_rate": 1.7487507932939324e-05, "loss": 0.5107, "step": 3180 }, { "epoch": 0.7095694847200535, "grad_norm": 0.16000831127166748, "learning_rate": 1.7485947751272657e-05, "loss": 0.462, "step": 3181 }, { "epoch": 0.7097925496319429, "grad_norm": 0.16015185415744781, "learning_rate": 1.748438715498712e-05, "loss": 0.4934, "step": 3182 }, { "epoch": 0.7100156145438322, "grad_norm": 0.1680067628622055, "learning_rate": 1.7482826144169144e-05, "loss": 0.5074, "step": 3183 }, { "epoch": 0.7102386794557216, "grad_norm": 0.16120266914367676, "learning_rate": 1.7481264718905187e-05, "loss": 0.4853, "step": 3184 }, { "epoch": 0.710461744367611, "grad_norm": 0.1535530686378479, "learning_rate": 1.747970287928173e-05, "loss": 0.4722, "step": 3185 }, { "epoch": 0.7106848092795003, "grad_norm": 0.1595955789089203, "learning_rate": 1.747814062538528e-05, "loss": 0.5031, "step": 3186 }, { "epoch": 0.7109078741913897, "grad_norm": 0.16170883178710938, "learning_rate": 1.7476577957302358e-05, "loss": 0.4947, "step": 3187 }, { "epoch": 0.7111309391032791, "grad_norm": 0.16378958523273468, "learning_rate": 1.747501487511952e-05, "loss": 0.4722, "step": 3188 }, { "epoch": 0.7113540040151685, "grad_norm": 0.17210274934768677, "learning_rate": 1.7473451378923344e-05, "loss": 0.5183, "step": 3189 }, { "epoch": 0.7115770689270577, "grad_norm": 0.15549586713314056, "learning_rate": 1.7471887468800416e-05, "loss": 0.48, "step": 3190 }, { "epoch": 0.7118001338389471, "grad_norm": 0.17286068201065063, "learning_rate": 1.747032314483736e-05, "loss": 0.4768, "step": 3191 }, { "epoch": 0.7120231987508365, "grad_norm": 0.1790837049484253, "learning_rate": 1.746875840712082e-05, "loss": 0.5181, "step": 3192 }, { "epoch": 0.7122462636627258, "grad_norm": 0.1648358702659607, "learning_rate": 1.746719325573746e-05, "loss": 0.508, "step": 3193 }, { "epoch": 0.7124693285746152, "grad_norm": 0.16890966892242432, "learning_rate": 1.7465627690773964e-05, "loss": 0.4928, "step": 3194 }, { "epoch": 0.7126923934865046, "grad_norm": 0.1561020165681839, "learning_rate": 1.7464061712317047e-05, "loss": 0.4783, "step": 3195 }, { "epoch": 0.712915458398394, "grad_norm": 0.17456288635730743, "learning_rate": 1.7462495320453442e-05, "loss": 0.5203, "step": 3196 }, { "epoch": 0.7131385233102833, "grad_norm": 0.16500715911388397, "learning_rate": 1.7460928515269902e-05, "loss": 0.5259, "step": 3197 }, { "epoch": 0.7133615882221727, "grad_norm": 0.1663879156112671, "learning_rate": 1.7459361296853217e-05, "loss": 0.5204, "step": 3198 }, { "epoch": 0.713584653134062, "grad_norm": 0.1611357033252716, "learning_rate": 1.745779366529018e-05, "loss": 0.498, "step": 3199 }, { "epoch": 0.7138077180459513, "grad_norm": 0.1556338518857956, "learning_rate": 1.7456225620667613e-05, "loss": 0.4753, "step": 3200 }, { "epoch": 0.7140307829578407, "grad_norm": 0.16986200213432312, "learning_rate": 1.7454657163072372e-05, "loss": 0.5075, "step": 3201 }, { "epoch": 0.7142538478697301, "grad_norm": 0.16040728986263275, "learning_rate": 1.7453088292591327e-05, "loss": 0.5016, "step": 3202 }, { "epoch": 0.7144769127816194, "grad_norm": 0.16356582939624786, "learning_rate": 1.7451519009311368e-05, "loss": 0.5118, "step": 3203 }, { "epoch": 0.7146999776935088, "grad_norm": 0.16113781929016113, "learning_rate": 1.744994931331942e-05, "loss": 0.4869, "step": 3204 }, { "epoch": 0.7149230426053982, "grad_norm": 0.17623494565486908, "learning_rate": 1.744837920470241e-05, "loss": 0.5247, "step": 3205 }, { "epoch": 0.7151461075172876, "grad_norm": 0.16005289554595947, "learning_rate": 1.744680868354731e-05, "loss": 0.4722, "step": 3206 }, { "epoch": 0.7153691724291769, "grad_norm": 0.18001532554626465, "learning_rate": 1.7445237749941106e-05, "loss": 0.4879, "step": 3207 }, { "epoch": 0.7155922373410663, "grad_norm": 0.1617603451013565, "learning_rate": 1.74436664039708e-05, "loss": 0.4962, "step": 3208 }, { "epoch": 0.7158153022529556, "grad_norm": 0.16146957874298096, "learning_rate": 1.7442094645723425e-05, "loss": 0.4849, "step": 3209 }, { "epoch": 0.7160383671648449, "grad_norm": 0.16159506142139435, "learning_rate": 1.744052247528604e-05, "loss": 0.4814, "step": 3210 }, { "epoch": 0.7162614320767343, "grad_norm": 0.16017811000347137, "learning_rate": 1.7438949892745717e-05, "loss": 0.4779, "step": 3211 }, { "epoch": 0.7164844969886237, "grad_norm": 0.16643457114696503, "learning_rate": 1.7437376898189554e-05, "loss": 0.5058, "step": 3212 }, { "epoch": 0.7167075619005131, "grad_norm": 0.174330934882164, "learning_rate": 1.7435803491704674e-05, "loss": 0.5037, "step": 3213 }, { "epoch": 0.7169306268124024, "grad_norm": 0.16516099870204926, "learning_rate": 1.7434229673378226e-05, "loss": 0.4929, "step": 3214 }, { "epoch": 0.7171536917242918, "grad_norm": 0.16369006037712097, "learning_rate": 1.7432655443297377e-05, "loss": 0.4788, "step": 3215 }, { "epoch": 0.7173767566361812, "grad_norm": 0.19648444652557373, "learning_rate": 1.7431080801549313e-05, "loss": 0.4808, "step": 3216 }, { "epoch": 0.7175998215480704, "grad_norm": 0.16290031373500824, "learning_rate": 1.742950574822125e-05, "loss": 0.4811, "step": 3217 }, { "epoch": 0.7178228864599598, "grad_norm": 0.15613336861133575, "learning_rate": 1.7427930283400428e-05, "loss": 0.4948, "step": 3218 }, { "epoch": 0.7180459513718492, "grad_norm": 0.17377032339572906, "learning_rate": 1.7426354407174102e-05, "loss": 0.5127, "step": 3219 }, { "epoch": 0.7182690162837385, "grad_norm": 0.16334526240825653, "learning_rate": 1.7424778119629556e-05, "loss": 0.4865, "step": 3220 }, { "epoch": 0.7184920811956279, "grad_norm": 0.15451814234256744, "learning_rate": 1.7423201420854092e-05, "loss": 0.4711, "step": 3221 }, { "epoch": 0.7187151461075173, "grad_norm": 0.17903545498847961, "learning_rate": 1.7421624310935043e-05, "loss": 0.5265, "step": 3222 }, { "epoch": 0.7189382110194067, "grad_norm": 0.15851649641990662, "learning_rate": 1.7420046789959754e-05, "loss": 0.5125, "step": 3223 }, { "epoch": 0.719161275931296, "grad_norm": 0.15990275144577026, "learning_rate": 1.74184688580156e-05, "loss": 0.4983, "step": 3224 }, { "epoch": 0.7193843408431854, "grad_norm": 0.16905120015144348, "learning_rate": 1.7416890515189977e-05, "loss": 0.514, "step": 3225 }, { "epoch": 0.7196074057550748, "grad_norm": 0.16614043712615967, "learning_rate": 1.74153117615703e-05, "loss": 0.5071, "step": 3226 }, { "epoch": 0.719830470666964, "grad_norm": 0.1659470647573471, "learning_rate": 1.741373259724402e-05, "loss": 0.5101, "step": 3227 }, { "epoch": 0.7200535355788534, "grad_norm": 0.17081058025360107, "learning_rate": 1.7412153022298587e-05, "loss": 0.5345, "step": 3228 }, { "epoch": 0.7202766004907428, "grad_norm": 0.17581294476985931, "learning_rate": 1.74105730368215e-05, "loss": 0.5119, "step": 3229 }, { "epoch": 0.7204996654026322, "grad_norm": 0.15835914015769958, "learning_rate": 1.7408992640900263e-05, "loss": 0.5, "step": 3230 }, { "epoch": 0.7207227303145215, "grad_norm": 0.16518062353134155, "learning_rate": 1.740741183462241e-05, "loss": 0.4791, "step": 3231 }, { "epoch": 0.7209457952264109, "grad_norm": 0.19645792245864868, "learning_rate": 1.7405830618075494e-05, "loss": 0.4851, "step": 3232 }, { "epoch": 0.7211688601383003, "grad_norm": 0.1701032668352127, "learning_rate": 1.7404248991347093e-05, "loss": 0.5165, "step": 3233 }, { "epoch": 0.7213919250501896, "grad_norm": 0.16348305344581604, "learning_rate": 1.740266695452481e-05, "loss": 0.5236, "step": 3234 }, { "epoch": 0.721614989962079, "grad_norm": 0.1572495847940445, "learning_rate": 1.7401084507696263e-05, "loss": 0.5141, "step": 3235 }, { "epoch": 0.7218380548739683, "grad_norm": 0.16454187035560608, "learning_rate": 1.7399501650949107e-05, "loss": 0.4714, "step": 3236 }, { "epoch": 0.7220611197858576, "grad_norm": 0.17999160289764404, "learning_rate": 1.7397918384371003e-05, "loss": 0.5035, "step": 3237 }, { "epoch": 0.722284184697747, "grad_norm": 0.16409459710121155, "learning_rate": 1.739633470804964e-05, "loss": 0.4903, "step": 3238 }, { "epoch": 0.7225072496096364, "grad_norm": 0.15939798951148987, "learning_rate": 1.739475062207274e-05, "loss": 0.4968, "step": 3239 }, { "epoch": 0.7227303145215258, "grad_norm": 0.1598650962114334, "learning_rate": 1.7393166126528035e-05, "loss": 0.4829, "step": 3240 }, { "epoch": 0.7229533794334151, "grad_norm": 0.1741870641708374, "learning_rate": 1.7391581221503286e-05, "loss": 0.5268, "step": 3241 }, { "epoch": 0.7231764443453045, "grad_norm": 0.16262130439281464, "learning_rate": 1.7389995907086273e-05, "loss": 0.4814, "step": 3242 }, { "epoch": 0.7233995092571939, "grad_norm": 0.16130922734737396, "learning_rate": 1.73884101833648e-05, "loss": 0.48, "step": 3243 }, { "epoch": 0.7236225741690832, "grad_norm": 0.167790949344635, "learning_rate": 1.7386824050426697e-05, "loss": 0.4774, "step": 3244 }, { "epoch": 0.7238456390809725, "grad_norm": 0.16130533814430237, "learning_rate": 1.7385237508359812e-05, "loss": 0.4765, "step": 3245 }, { "epoch": 0.7240687039928619, "grad_norm": 0.16216439008712769, "learning_rate": 1.7383650557252023e-05, "loss": 0.5197, "step": 3246 }, { "epoch": 0.7242917689047513, "grad_norm": 0.17525294423103333, "learning_rate": 1.7382063197191218e-05, "loss": 0.5063, "step": 3247 }, { "epoch": 0.7245148338166406, "grad_norm": 0.16659271717071533, "learning_rate": 1.738047542826532e-05, "loss": 0.5077, "step": 3248 }, { "epoch": 0.72473789872853, "grad_norm": 0.15952268242835999, "learning_rate": 1.7378887250562268e-05, "loss": 0.4991, "step": 3249 }, { "epoch": 0.7249609636404194, "grad_norm": 0.16580569744110107, "learning_rate": 1.737729866417002e-05, "loss": 0.5026, "step": 3250 }, { "epoch": 0.7251840285523087, "grad_norm": 0.16428150236606598, "learning_rate": 1.7375709669176572e-05, "loss": 0.4993, "step": 3251 }, { "epoch": 0.7254070934641981, "grad_norm": 0.17543014883995056, "learning_rate": 1.7374120265669927e-05, "loss": 0.483, "step": 3252 }, { "epoch": 0.7256301583760875, "grad_norm": 0.16254562139511108, "learning_rate": 1.7372530453738113e-05, "loss": 0.5051, "step": 3253 }, { "epoch": 0.7258532232879769, "grad_norm": 0.16587981581687927, "learning_rate": 1.737094023346919e-05, "loss": 0.4717, "step": 3254 }, { "epoch": 0.7260762881998661, "grad_norm": 0.15644344687461853, "learning_rate": 1.7369349604951233e-05, "loss": 0.4918, "step": 3255 }, { "epoch": 0.7262993531117555, "grad_norm": 0.16663451492786407, "learning_rate": 1.736775856827234e-05, "loss": 0.4973, "step": 3256 }, { "epoch": 0.7265224180236449, "grad_norm": 0.16686449944972992, "learning_rate": 1.736616712352063e-05, "loss": 0.5115, "step": 3257 }, { "epoch": 0.7267454829355342, "grad_norm": 0.15312035381793976, "learning_rate": 1.736457527078425e-05, "loss": 0.4498, "step": 3258 }, { "epoch": 0.7269685478474236, "grad_norm": 0.17026790976524353, "learning_rate": 1.7362983010151368e-05, "loss": 0.4958, "step": 3259 }, { "epoch": 0.727191612759313, "grad_norm": 0.16290387511253357, "learning_rate": 1.7361390341710173e-05, "loss": 0.4785, "step": 3260 }, { "epoch": 0.7274146776712023, "grad_norm": 0.15380245447158813, "learning_rate": 1.7359797265548876e-05, "loss": 0.466, "step": 3261 }, { "epoch": 0.7276377425830917, "grad_norm": 0.1699349284172058, "learning_rate": 1.7358203781755707e-05, "loss": 0.4858, "step": 3262 }, { "epoch": 0.727860807494981, "grad_norm": 0.16431613266468048, "learning_rate": 1.735660989041893e-05, "loss": 0.5011, "step": 3263 }, { "epoch": 0.7280838724068704, "grad_norm": 0.16305620968341827, "learning_rate": 1.735501559162682e-05, "loss": 0.5167, "step": 3264 }, { "epoch": 0.7283069373187597, "grad_norm": 0.15818987786769867, "learning_rate": 1.7353420885467688e-05, "loss": 0.4953, "step": 3265 }, { "epoch": 0.7285300022306491, "grad_norm": 0.164206400513649, "learning_rate": 1.7351825772029847e-05, "loss": 0.5128, "step": 3266 }, { "epoch": 0.7287530671425385, "grad_norm": 0.15800419449806213, "learning_rate": 1.7350230251401653e-05, "loss": 0.4654, "step": 3267 }, { "epoch": 0.7289761320544278, "grad_norm": 0.16068902611732483, "learning_rate": 1.734863432367147e-05, "loss": 0.4997, "step": 3268 }, { "epoch": 0.7291991969663172, "grad_norm": 0.16787229478359222, "learning_rate": 1.7347037988927696e-05, "loss": 0.5244, "step": 3269 }, { "epoch": 0.7294222618782066, "grad_norm": 0.1621689349412918, "learning_rate": 1.7345441247258743e-05, "loss": 0.4855, "step": 3270 }, { "epoch": 0.729645326790096, "grad_norm": 0.16722342371940613, "learning_rate": 1.734384409875305e-05, "loss": 0.4992, "step": 3271 }, { "epoch": 0.7298683917019853, "grad_norm": 0.15855719149112701, "learning_rate": 1.7342246543499074e-05, "loss": 0.5074, "step": 3272 }, { "epoch": 0.7300914566138746, "grad_norm": 0.17619627714157104, "learning_rate": 1.7340648581585296e-05, "loss": 0.4834, "step": 3273 }, { "epoch": 0.730314521525764, "grad_norm": 0.17650535702705383, "learning_rate": 1.7339050213100233e-05, "loss": 0.5393, "step": 3274 }, { "epoch": 0.7305375864376533, "grad_norm": 0.17073291540145874, "learning_rate": 1.73374514381324e-05, "loss": 0.4873, "step": 3275 }, { "epoch": 0.7307606513495427, "grad_norm": 0.1601572334766388, "learning_rate": 1.733585225677035e-05, "loss": 0.485, "step": 3276 }, { "epoch": 0.7309837162614321, "grad_norm": 0.1590786874294281, "learning_rate": 1.7334252669102665e-05, "loss": 0.4668, "step": 3277 }, { "epoch": 0.7312067811733214, "grad_norm": 0.15466643869876862, "learning_rate": 1.7332652675217928e-05, "loss": 0.4476, "step": 3278 }, { "epoch": 0.7314298460852108, "grad_norm": 0.15521705150604248, "learning_rate": 1.733105227520476e-05, "loss": 0.4878, "step": 3279 }, { "epoch": 0.7316529109971002, "grad_norm": 0.21078747510910034, "learning_rate": 1.7329451469151807e-05, "loss": 0.5203, "step": 3280 }, { "epoch": 0.7318759759089896, "grad_norm": 0.1662643849849701, "learning_rate": 1.7327850257147724e-05, "loss": 0.5099, "step": 3281 }, { "epoch": 0.7320990408208788, "grad_norm": 0.16159231960773468, "learning_rate": 1.73262486392812e-05, "loss": 0.4944, "step": 3282 }, { "epoch": 0.7323221057327682, "grad_norm": 0.16965213418006897, "learning_rate": 1.7324646615640947e-05, "loss": 0.4966, "step": 3283 }, { "epoch": 0.7325451706446576, "grad_norm": 0.1666761040687561, "learning_rate": 1.732304418631569e-05, "loss": 0.4846, "step": 3284 }, { "epoch": 0.7327682355565469, "grad_norm": 0.16207394003868103, "learning_rate": 1.7321441351394178e-05, "loss": 0.5124, "step": 3285 }, { "epoch": 0.7329913004684363, "grad_norm": 0.15828841924667358, "learning_rate": 1.7319838110965192e-05, "loss": 0.4887, "step": 3286 }, { "epoch": 0.7332143653803257, "grad_norm": 0.16760720312595367, "learning_rate": 1.731823446511753e-05, "loss": 0.4715, "step": 3287 }, { "epoch": 0.7334374302922151, "grad_norm": 0.1632707566022873, "learning_rate": 1.7316630413940005e-05, "loss": 0.4803, "step": 3288 }, { "epoch": 0.7336604952041044, "grad_norm": 0.1794753223657608, "learning_rate": 1.7315025957521468e-05, "loss": 0.5025, "step": 3289 }, { "epoch": 0.7338835601159938, "grad_norm": 0.1697871834039688, "learning_rate": 1.7313421095950778e-05, "loss": 0.4725, "step": 3290 }, { "epoch": 0.7341066250278832, "grad_norm": 0.16839033365249634, "learning_rate": 1.7311815829316826e-05, "loss": 0.5406, "step": 3291 }, { "epoch": 0.7343296899397724, "grad_norm": 0.16735753417015076, "learning_rate": 1.731021015770852e-05, "loss": 0.5238, "step": 3292 }, { "epoch": 0.7345527548516618, "grad_norm": 0.16878098249435425, "learning_rate": 1.7308604081214793e-05, "loss": 0.4978, "step": 3293 }, { "epoch": 0.7347758197635512, "grad_norm": 0.16095402836799622, "learning_rate": 1.7306997599924597e-05, "loss": 0.4696, "step": 3294 }, { "epoch": 0.7349988846754405, "grad_norm": 0.16328908503055573, "learning_rate": 1.730539071392691e-05, "loss": 0.4965, "step": 3295 }, { "epoch": 0.7352219495873299, "grad_norm": 0.1627175509929657, "learning_rate": 1.7303783423310735e-05, "loss": 0.4822, "step": 3296 }, { "epoch": 0.7354450144992193, "grad_norm": 0.1619403213262558, "learning_rate": 1.730217572816509e-05, "loss": 0.5027, "step": 3297 }, { "epoch": 0.7356680794111087, "grad_norm": 0.16253367066383362, "learning_rate": 1.7300567628579025e-05, "loss": 0.5028, "step": 3298 }, { "epoch": 0.735891144322998, "grad_norm": 0.16807517409324646, "learning_rate": 1.72989591246416e-05, "loss": 0.4756, "step": 3299 }, { "epoch": 0.7361142092348874, "grad_norm": 0.18981721997261047, "learning_rate": 1.7297350216441903e-05, "loss": 0.5209, "step": 3300 }, { "epoch": 0.7363372741467767, "grad_norm": 0.1598469614982605, "learning_rate": 1.7295740904069053e-05, "loss": 0.4872, "step": 3301 }, { "epoch": 0.736560339058666, "grad_norm": 0.20748665928840637, "learning_rate": 1.7294131187612176e-05, "loss": 0.4603, "step": 3302 }, { "epoch": 0.7367834039705554, "grad_norm": 0.16444922983646393, "learning_rate": 1.7292521067160434e-05, "loss": 0.4772, "step": 3303 }, { "epoch": 0.7370064688824448, "grad_norm": 0.16779246926307678, "learning_rate": 1.7290910542803004e-05, "loss": 0.4948, "step": 3304 }, { "epoch": 0.7372295337943342, "grad_norm": 0.16438250243663788, "learning_rate": 1.7289299614629083e-05, "loss": 0.5103, "step": 3305 }, { "epoch": 0.7374525987062235, "grad_norm": 0.17737194895744324, "learning_rate": 1.7287688282727903e-05, "loss": 0.4994, "step": 3306 }, { "epoch": 0.7376756636181129, "grad_norm": 0.15918989479541779, "learning_rate": 1.7286076547188703e-05, "loss": 0.4978, "step": 3307 }, { "epoch": 0.7378987285300023, "grad_norm": 0.3185868561267853, "learning_rate": 1.728446440810075e-05, "loss": 0.489, "step": 3308 }, { "epoch": 0.7381217934418915, "grad_norm": 0.1764698624610901, "learning_rate": 1.728285186555334e-05, "loss": 0.506, "step": 3309 }, { "epoch": 0.7383448583537809, "grad_norm": 0.16116134822368622, "learning_rate": 1.7281238919635784e-05, "loss": 0.519, "step": 3310 }, { "epoch": 0.7385679232656703, "grad_norm": 0.1670210361480713, "learning_rate": 1.7279625570437413e-05, "loss": 0.5228, "step": 3311 }, { "epoch": 0.7387909881775596, "grad_norm": 0.16115038096904755, "learning_rate": 1.7278011818047588e-05, "loss": 0.4882, "step": 3312 }, { "epoch": 0.739014053089449, "grad_norm": 0.16242891550064087, "learning_rate": 1.7276397662555685e-05, "loss": 0.5139, "step": 3313 }, { "epoch": 0.7392371180013384, "grad_norm": 0.16696135699748993, "learning_rate": 1.7274783104051112e-05, "loss": 0.521, "step": 3314 }, { "epoch": 0.7394601829132278, "grad_norm": 0.1544819325208664, "learning_rate": 1.727316814262329e-05, "loss": 0.4765, "step": 3315 }, { "epoch": 0.7396832478251171, "grad_norm": 0.16446231305599213, "learning_rate": 1.727155277836167e-05, "loss": 0.4683, "step": 3316 }, { "epoch": 0.7399063127370065, "grad_norm": 0.1666785031557083, "learning_rate": 1.7269937011355713e-05, "loss": 0.503, "step": 3317 }, { "epoch": 0.7401293776488959, "grad_norm": 0.16166886687278748, "learning_rate": 1.7268320841694915e-05, "loss": 0.5014, "step": 3318 }, { "epoch": 0.7403524425607851, "grad_norm": 0.27264273166656494, "learning_rate": 1.7266704269468786e-05, "loss": 0.4834, "step": 3319 }, { "epoch": 0.7405755074726745, "grad_norm": 0.16175110638141632, "learning_rate": 1.7265087294766872e-05, "loss": 0.4926, "step": 3320 }, { "epoch": 0.7407985723845639, "grad_norm": 0.1768079400062561, "learning_rate": 1.726346991767872e-05, "loss": 0.4933, "step": 3321 }, { "epoch": 0.7410216372964533, "grad_norm": 0.15689828991889954, "learning_rate": 1.7261852138293918e-05, "loss": 0.4928, "step": 3322 }, { "epoch": 0.7412447022083426, "grad_norm": 0.16128939390182495, "learning_rate": 1.7260233956702062e-05, "loss": 0.4739, "step": 3323 }, { "epoch": 0.741467767120232, "grad_norm": 0.16142840683460236, "learning_rate": 1.7258615372992783e-05, "loss": 0.5123, "step": 3324 }, { "epoch": 0.7416908320321214, "grad_norm": 0.15940620005130768, "learning_rate": 1.7256996387255725e-05, "loss": 0.5013, "step": 3325 }, { "epoch": 0.7419138969440107, "grad_norm": 0.16134943068027496, "learning_rate": 1.7255376999580557e-05, "loss": 0.4651, "step": 3326 }, { "epoch": 0.7421369618559001, "grad_norm": 0.15544810891151428, "learning_rate": 1.7253757210056978e-05, "loss": 0.4657, "step": 3327 }, { "epoch": 0.7423600267677894, "grad_norm": 0.15278422832489014, "learning_rate": 1.7252137018774694e-05, "loss": 0.4604, "step": 3328 }, { "epoch": 0.7425830916796788, "grad_norm": 0.15711228549480438, "learning_rate": 1.7250516425823443e-05, "loss": 0.4903, "step": 3329 }, { "epoch": 0.7428061565915681, "grad_norm": 0.15838393568992615, "learning_rate": 1.7248895431292988e-05, "loss": 0.484, "step": 3330 }, { "epoch": 0.7430292215034575, "grad_norm": 0.15732166171073914, "learning_rate": 1.72472740352731e-05, "loss": 0.4637, "step": 3331 }, { "epoch": 0.7432522864153469, "grad_norm": 0.16680875420570374, "learning_rate": 1.7245652237853593e-05, "loss": 0.4999, "step": 3332 }, { "epoch": 0.7434753513272362, "grad_norm": 0.16169287264347076, "learning_rate": 1.7244030039124287e-05, "loss": 0.4829, "step": 3333 }, { "epoch": 0.7436984162391256, "grad_norm": 0.15638568997383118, "learning_rate": 1.7242407439175035e-05, "loss": 0.4731, "step": 3334 }, { "epoch": 0.743921481151015, "grad_norm": 0.1748877912759781, "learning_rate": 1.72407844380957e-05, "loss": 0.4979, "step": 3335 }, { "epoch": 0.7441445460629043, "grad_norm": 0.15920549631118774, "learning_rate": 1.7239161035976175e-05, "loss": 0.5024, "step": 3336 }, { "epoch": 0.7443676109747936, "grad_norm": 0.16807498037815094, "learning_rate": 1.7237537232906376e-05, "loss": 0.5414, "step": 3337 }, { "epoch": 0.744590675886683, "grad_norm": 0.16192768514156342, "learning_rate": 1.723591302897624e-05, "loss": 0.4883, "step": 3338 }, { "epoch": 0.7448137407985724, "grad_norm": 0.15893089771270752, "learning_rate": 1.7234288424275726e-05, "loss": 0.5002, "step": 3339 }, { "epoch": 0.7450368057104617, "grad_norm": 0.1622776985168457, "learning_rate": 1.7232663418894812e-05, "loss": 0.4859, "step": 3340 }, { "epoch": 0.7452598706223511, "grad_norm": 0.1694253832101822, "learning_rate": 1.72310380129235e-05, "loss": 0.4903, "step": 3341 }, { "epoch": 0.7454829355342405, "grad_norm": 0.1649332493543625, "learning_rate": 1.722941220645182e-05, "loss": 0.5145, "step": 3342 }, { "epoch": 0.7457060004461298, "grad_norm": 0.19938278198242188, "learning_rate": 1.722778599956982e-05, "loss": 0.4949, "step": 3343 }, { "epoch": 0.7459290653580192, "grad_norm": 0.15563727915287018, "learning_rate": 1.7226159392367564e-05, "loss": 0.4911, "step": 3344 }, { "epoch": 0.7461521302699086, "grad_norm": 0.15972435474395752, "learning_rate": 1.7224532384935148e-05, "loss": 0.4748, "step": 3345 }, { "epoch": 0.746375195181798, "grad_norm": 0.16170111298561096, "learning_rate": 1.722290497736268e-05, "loss": 0.4701, "step": 3346 }, { "epoch": 0.7465982600936872, "grad_norm": 0.1537921279668808, "learning_rate": 1.7221277169740305e-05, "loss": 0.478, "step": 3347 }, { "epoch": 0.7468213250055766, "grad_norm": 0.15656088292598724, "learning_rate": 1.7219648962158174e-05, "loss": 0.4851, "step": 3348 }, { "epoch": 0.747044389917466, "grad_norm": 0.16441093385219574, "learning_rate": 1.7218020354706473e-05, "loss": 0.4846, "step": 3349 }, { "epoch": 0.7472674548293553, "grad_norm": 0.17135939002037048, "learning_rate": 1.72163913474754e-05, "loss": 0.5044, "step": 3350 }, { "epoch": 0.7474905197412447, "grad_norm": 0.17070244252681732, "learning_rate": 1.721476194055518e-05, "loss": 0.537, "step": 3351 }, { "epoch": 0.7477135846531341, "grad_norm": 0.1651678830385208, "learning_rate": 1.7213132134036063e-05, "loss": 0.5161, "step": 3352 }, { "epoch": 0.7479366495650234, "grad_norm": 0.16997648775577545, "learning_rate": 1.7211501928008317e-05, "loss": 0.5026, "step": 3353 }, { "epoch": 0.7481597144769128, "grad_norm": 0.17357714474201202, "learning_rate": 1.7209871322562232e-05, "loss": 0.4899, "step": 3354 }, { "epoch": 0.7483827793888022, "grad_norm": 0.16555210947990417, "learning_rate": 1.7208240317788115e-05, "loss": 0.5176, "step": 3355 }, { "epoch": 0.7486058443006915, "grad_norm": 0.15799009799957275, "learning_rate": 1.7206608913776315e-05, "loss": 0.4923, "step": 3356 }, { "epoch": 0.7488289092125808, "grad_norm": 0.15853382647037506, "learning_rate": 1.720497711061718e-05, "loss": 0.4881, "step": 3357 }, { "epoch": 0.7490519741244702, "grad_norm": 0.15760891139507294, "learning_rate": 1.720334490840109e-05, "loss": 0.5008, "step": 3358 }, { "epoch": 0.7492750390363596, "grad_norm": 0.16732452809810638, "learning_rate": 1.720171230721845e-05, "loss": 0.4923, "step": 3359 }, { "epoch": 0.7494981039482489, "grad_norm": 0.17124591767787933, "learning_rate": 1.7200079307159677e-05, "loss": 0.5272, "step": 3360 }, { "epoch": 0.7497211688601383, "grad_norm": 0.16751034557819366, "learning_rate": 1.7198445908315226e-05, "loss": 0.4801, "step": 3361 }, { "epoch": 0.7499442337720277, "grad_norm": 0.16137580573558807, "learning_rate": 1.719681211077556e-05, "loss": 0.5371, "step": 3362 }, { "epoch": 0.7501672986839171, "grad_norm": 0.16350311040878296, "learning_rate": 1.7195177914631172e-05, "loss": 0.5152, "step": 3363 }, { "epoch": 0.7503903635958064, "grad_norm": 0.1560794562101364, "learning_rate": 1.719354331997257e-05, "loss": 0.4788, "step": 3364 }, { "epoch": 0.7506134285076957, "grad_norm": 0.16271278262138367, "learning_rate": 1.7191908326890288e-05, "loss": 0.4975, "step": 3365 }, { "epoch": 0.7508364934195851, "grad_norm": 0.21892762184143066, "learning_rate": 1.7190272935474883e-05, "loss": 0.511, "step": 3366 }, { "epoch": 0.7510595583314744, "grad_norm": 0.1879514753818512, "learning_rate": 1.7188637145816937e-05, "loss": 0.5277, "step": 3367 }, { "epoch": 0.7512826232433638, "grad_norm": 0.17167915403842926, "learning_rate": 1.718700095800705e-05, "loss": 0.5128, "step": 3368 }, { "epoch": 0.7515056881552532, "grad_norm": 0.16950075328350067, "learning_rate": 1.718536437213584e-05, "loss": 0.5175, "step": 3369 }, { "epoch": 0.7517287530671425, "grad_norm": 0.1571718156337738, "learning_rate": 1.718372738829395e-05, "loss": 0.489, "step": 3370 }, { "epoch": 0.7519518179790319, "grad_norm": 0.161586731672287, "learning_rate": 1.718209000657205e-05, "loss": 0.5112, "step": 3371 }, { "epoch": 0.7521748828909213, "grad_norm": 0.16906411945819855, "learning_rate": 1.718045222706083e-05, "loss": 0.5171, "step": 3372 }, { "epoch": 0.7523979478028107, "grad_norm": 0.17176932096481323, "learning_rate": 1.7178814049851e-05, "loss": 0.5352, "step": 3373 }, { "epoch": 0.7526210127146999, "grad_norm": 0.15188109874725342, "learning_rate": 1.717717547503329e-05, "loss": 0.4857, "step": 3374 }, { "epoch": 0.7528440776265893, "grad_norm": 0.1867418736219406, "learning_rate": 1.7175536502698456e-05, "loss": 0.5199, "step": 3375 }, { "epoch": 0.7530671425384787, "grad_norm": 0.1905105859041214, "learning_rate": 1.7173897132937274e-05, "loss": 0.5077, "step": 3376 }, { "epoch": 0.753290207450368, "grad_norm": 0.16345377266407013, "learning_rate": 1.7172257365840544e-05, "loss": 0.5362, "step": 3377 }, { "epoch": 0.7535132723622574, "grad_norm": 0.17045968770980835, "learning_rate": 1.7170617201499083e-05, "loss": 0.5057, "step": 3378 }, { "epoch": 0.7537363372741468, "grad_norm": 0.16535595059394836, "learning_rate": 1.716897664000374e-05, "loss": 0.5141, "step": 3379 }, { "epoch": 0.7539594021860362, "grad_norm": 0.16590653359889984, "learning_rate": 1.716733568144538e-05, "loss": 0.5146, "step": 3380 }, { "epoch": 0.7541824670979255, "grad_norm": 0.156137153506279, "learning_rate": 1.716569432591488e-05, "loss": 0.4984, "step": 3381 }, { "epoch": 0.7544055320098149, "grad_norm": 0.17076677083969116, "learning_rate": 1.7164052573503155e-05, "loss": 0.4968, "step": 3382 }, { "epoch": 0.7546285969217043, "grad_norm": 0.15686576068401337, "learning_rate": 1.7162410424301132e-05, "loss": 0.4868, "step": 3383 }, { "epoch": 0.7548516618335935, "grad_norm": 0.19442743062973022, "learning_rate": 1.716076787839977e-05, "loss": 0.5058, "step": 3384 }, { "epoch": 0.7550747267454829, "grad_norm": 0.16424842178821564, "learning_rate": 1.715912493589004e-05, "loss": 0.4857, "step": 3385 }, { "epoch": 0.7552977916573723, "grad_norm": 0.6959996819496155, "learning_rate": 1.7157481596862936e-05, "loss": 0.5091, "step": 3386 }, { "epoch": 0.7555208565692616, "grad_norm": 0.17369645833969116, "learning_rate": 1.7155837861409482e-05, "loss": 0.5116, "step": 3387 }, { "epoch": 0.755743921481151, "grad_norm": 0.2050577700138092, "learning_rate": 1.7154193729620713e-05, "loss": 0.4982, "step": 3388 }, { "epoch": 0.7559669863930404, "grad_norm": 0.15558819472789764, "learning_rate": 1.7152549201587695e-05, "loss": 0.4873, "step": 3389 }, { "epoch": 0.7561900513049298, "grad_norm": 0.16767403483390808, "learning_rate": 1.715090427740151e-05, "loss": 0.4806, "step": 3390 }, { "epoch": 0.7564131162168191, "grad_norm": 0.1653711050748825, "learning_rate": 1.714925895715326e-05, "loss": 0.4858, "step": 3391 }, { "epoch": 0.7566361811287085, "grad_norm": 0.1628393530845642, "learning_rate": 1.7147613240934087e-05, "loss": 0.5036, "step": 3392 }, { "epoch": 0.7568592460405978, "grad_norm": 0.1690330058336258, "learning_rate": 1.714596712883513e-05, "loss": 0.4788, "step": 3393 }, { "epoch": 0.7570823109524871, "grad_norm": 0.18115870654582977, "learning_rate": 1.714432062094756e-05, "loss": 0.5009, "step": 3394 }, { "epoch": 0.7573053758643765, "grad_norm": 0.17084982991218567, "learning_rate": 1.7142673717362578e-05, "loss": 0.5281, "step": 3395 }, { "epoch": 0.7575284407762659, "grad_norm": 0.16751186549663544, "learning_rate": 1.7141026418171396e-05, "loss": 0.5377, "step": 3396 }, { "epoch": 0.7577515056881553, "grad_norm": 0.16510246694087982, "learning_rate": 1.713937872346525e-05, "loss": 0.4995, "step": 3397 }, { "epoch": 0.7579745706000446, "grad_norm": 0.1689106822013855, "learning_rate": 1.7137730633335404e-05, "loss": 0.4891, "step": 3398 }, { "epoch": 0.758197635511934, "grad_norm": 0.16398201882839203, "learning_rate": 1.7136082147873136e-05, "loss": 0.5013, "step": 3399 }, { "epoch": 0.7584207004238234, "grad_norm": 0.16415871679782867, "learning_rate": 1.713443326716975e-05, "loss": 0.5343, "step": 3400 }, { "epoch": 0.7586437653357126, "grad_norm": 0.164589986205101, "learning_rate": 1.7132783991316577e-05, "loss": 0.4897, "step": 3401 }, { "epoch": 0.758866830247602, "grad_norm": 0.17511430382728577, "learning_rate": 1.7131134320404953e-05, "loss": 0.5088, "step": 3402 }, { "epoch": 0.7590898951594914, "grad_norm": 0.1833367496728897, "learning_rate": 1.7129484254526257e-05, "loss": 0.4811, "step": 3403 }, { "epoch": 0.7593129600713808, "grad_norm": 0.16160368919372559, "learning_rate": 1.7127833793771874e-05, "loss": 0.508, "step": 3404 }, { "epoch": 0.7595360249832701, "grad_norm": 0.1855868101119995, "learning_rate": 1.7126182938233228e-05, "loss": 0.5091, "step": 3405 }, { "epoch": 0.7597590898951595, "grad_norm": 0.16717424988746643, "learning_rate": 1.7124531688001735e-05, "loss": 0.482, "step": 3406 }, { "epoch": 0.7599821548070489, "grad_norm": 0.1635364592075348, "learning_rate": 1.7122880043168872e-05, "loss": 0.4706, "step": 3407 }, { "epoch": 0.7602052197189382, "grad_norm": 0.15802796185016632, "learning_rate": 1.71212280038261e-05, "loss": 0.4835, "step": 3408 }, { "epoch": 0.7604282846308276, "grad_norm": 0.16960306465625763, "learning_rate": 1.7119575570064926e-05, "loss": 0.474, "step": 3409 }, { "epoch": 0.760651349542717, "grad_norm": 0.16367004811763763, "learning_rate": 1.7117922741976878e-05, "loss": 0.5132, "step": 3410 }, { "epoch": 0.7608744144546062, "grad_norm": 0.1700059324502945, "learning_rate": 1.7116269519653493e-05, "loss": 0.5124, "step": 3411 }, { "epoch": 0.7610974793664956, "grad_norm": 0.19023141264915466, "learning_rate": 1.711461590318634e-05, "loss": 0.487, "step": 3412 }, { "epoch": 0.761320544278385, "grad_norm": 0.16813622415065765, "learning_rate": 1.7112961892667003e-05, "loss": 0.4749, "step": 3413 }, { "epoch": 0.7615436091902744, "grad_norm": 0.16135910153388977, "learning_rate": 1.7111307488187096e-05, "loss": 0.5041, "step": 3414 }, { "epoch": 0.7617666741021637, "grad_norm": 0.16579851508140564, "learning_rate": 1.710965268983825e-05, "loss": 0.5062, "step": 3415 }, { "epoch": 0.7619897390140531, "grad_norm": 0.15735271573066711, "learning_rate": 1.7107997497712113e-05, "loss": 0.5071, "step": 3416 }, { "epoch": 0.7622128039259425, "grad_norm": 0.1686030477285385, "learning_rate": 1.7106341911900365e-05, "loss": 0.5271, "step": 3417 }, { "epoch": 0.7624358688378318, "grad_norm": 0.17071577906608582, "learning_rate": 1.7104685932494704e-05, "loss": 0.5179, "step": 3418 }, { "epoch": 0.7626589337497212, "grad_norm": 0.15663312375545502, "learning_rate": 1.7103029559586843e-05, "loss": 0.4928, "step": 3419 }, { "epoch": 0.7628819986616105, "grad_norm": 0.17715679109096527, "learning_rate": 1.7101372793268526e-05, "loss": 0.5371, "step": 3420 }, { "epoch": 0.7631050635734999, "grad_norm": 0.1651579737663269, "learning_rate": 1.709971563363151e-05, "loss": 0.5072, "step": 3421 }, { "epoch": 0.7633281284853892, "grad_norm": 0.15921546518802643, "learning_rate": 1.7098058080767587e-05, "loss": 0.5008, "step": 3422 }, { "epoch": 0.7635511933972786, "grad_norm": 0.17586684226989746, "learning_rate": 1.709640013476856e-05, "loss": 0.4962, "step": 3423 }, { "epoch": 0.763774258309168, "grad_norm": 0.17664553225040436, "learning_rate": 1.7094741795726254e-05, "loss": 0.5107, "step": 3424 }, { "epoch": 0.7639973232210573, "grad_norm": 0.1621769219636917, "learning_rate": 1.7093083063732518e-05, "loss": 0.5017, "step": 3425 }, { "epoch": 0.7642203881329467, "grad_norm": 0.1619473695755005, "learning_rate": 1.7091423938879227e-05, "loss": 0.4858, "step": 3426 }, { "epoch": 0.7644434530448361, "grad_norm": 0.1741994023323059, "learning_rate": 1.7089764421258272e-05, "loss": 0.5226, "step": 3427 }, { "epoch": 0.7646665179567254, "grad_norm": 0.16964323818683624, "learning_rate": 1.7088104510961564e-05, "loss": 0.4987, "step": 3428 }, { "epoch": 0.7648895828686147, "grad_norm": 0.1797408014535904, "learning_rate": 1.7086444208081047e-05, "loss": 0.4833, "step": 3429 }, { "epoch": 0.7651126477805041, "grad_norm": 0.19941550493240356, "learning_rate": 1.708478351270867e-05, "loss": 0.5138, "step": 3430 }, { "epoch": 0.7653357126923935, "grad_norm": 0.17348411679267883, "learning_rate": 1.708312242493642e-05, "loss": 0.4866, "step": 3431 }, { "epoch": 0.7655587776042828, "grad_norm": 0.1664038747549057, "learning_rate": 1.7081460944856294e-05, "loss": 0.5124, "step": 3432 }, { "epoch": 0.7657818425161722, "grad_norm": 0.15917468070983887, "learning_rate": 1.7079799072560318e-05, "loss": 0.5066, "step": 3433 }, { "epoch": 0.7660049074280616, "grad_norm": 0.16369706392288208, "learning_rate": 1.7078136808140532e-05, "loss": 0.4903, "step": 3434 }, { "epoch": 0.7662279723399509, "grad_norm": 0.16175441443920135, "learning_rate": 1.707647415168901e-05, "loss": 0.525, "step": 3435 }, { "epoch": 0.7664510372518403, "grad_norm": 0.19323702156543732, "learning_rate": 1.707481110329783e-05, "loss": 0.4865, "step": 3436 }, { "epoch": 0.7666741021637297, "grad_norm": 0.15811483561992645, "learning_rate": 1.707314766305912e-05, "loss": 0.4709, "step": 3437 }, { "epoch": 0.7668971670756191, "grad_norm": 0.1613859087228775, "learning_rate": 1.707148383106499e-05, "loss": 0.4969, "step": 3438 }, { "epoch": 0.7671202319875083, "grad_norm": 0.16382627189159393, "learning_rate": 1.706981960740761e-05, "loss": 0.4893, "step": 3439 }, { "epoch": 0.7673432968993977, "grad_norm": 0.17395590245723724, "learning_rate": 1.706815499217915e-05, "loss": 0.5073, "step": 3440 }, { "epoch": 0.7675663618112871, "grad_norm": 0.16161273419857025, "learning_rate": 1.7066489985471802e-05, "loss": 0.4766, "step": 3441 }, { "epoch": 0.7677894267231764, "grad_norm": 0.16916361451148987, "learning_rate": 1.706482458737779e-05, "loss": 0.5102, "step": 3442 }, { "epoch": 0.7680124916350658, "grad_norm": 0.16515444219112396, "learning_rate": 1.7063158797989355e-05, "loss": 0.5006, "step": 3443 }, { "epoch": 0.7682355565469552, "grad_norm": 0.16712456941604614, "learning_rate": 1.7061492617398755e-05, "loss": 0.4757, "step": 3444 }, { "epoch": 0.7684586214588445, "grad_norm": 0.1649584323167801, "learning_rate": 1.7059826045698275e-05, "loss": 0.4979, "step": 3445 }, { "epoch": 0.7686816863707339, "grad_norm": 0.16432853043079376, "learning_rate": 1.7058159082980223e-05, "loss": 0.5155, "step": 3446 }, { "epoch": 0.7689047512826233, "grad_norm": 0.15966151654720306, "learning_rate": 1.7056491729336917e-05, "loss": 0.4908, "step": 3447 }, { "epoch": 0.7691278161945126, "grad_norm": 0.1597587913274765, "learning_rate": 1.7054823984860716e-05, "loss": 0.4912, "step": 3448 }, { "epoch": 0.7693508811064019, "grad_norm": 0.16364127397537231, "learning_rate": 1.705315584964399e-05, "loss": 0.5045, "step": 3449 }, { "epoch": 0.7695739460182913, "grad_norm": 0.15808305144309998, "learning_rate": 1.7051487323779122e-05, "loss": 0.5025, "step": 3450 }, { "epoch": 0.7697970109301807, "grad_norm": 0.173845574259758, "learning_rate": 1.704981840735853e-05, "loss": 0.5166, "step": 3451 }, { "epoch": 0.77002007584207, "grad_norm": 0.1586831957101822, "learning_rate": 1.7048149100474653e-05, "loss": 0.4758, "step": 3452 }, { "epoch": 0.7702431407539594, "grad_norm": 0.16501812636852264, "learning_rate": 1.704647940321994e-05, "loss": 0.5051, "step": 3453 }, { "epoch": 0.7704662056658488, "grad_norm": 0.17648212611675262, "learning_rate": 1.704480931568688e-05, "loss": 0.5014, "step": 3454 }, { "epoch": 0.7706892705777382, "grad_norm": 0.15847481787204742, "learning_rate": 1.704313883796796e-05, "loss": 0.5246, "step": 3455 }, { "epoch": 0.7709123354896275, "grad_norm": 0.17660297453403473, "learning_rate": 1.704146797015571e-05, "loss": 0.5028, "step": 3456 }, { "epoch": 0.7711354004015168, "grad_norm": 0.1632460057735443, "learning_rate": 1.7039796712342672e-05, "loss": 0.4647, "step": 3457 }, { "epoch": 0.7713584653134062, "grad_norm": 0.15266932547092438, "learning_rate": 1.7038125064621408e-05, "loss": 0.4829, "step": 3458 }, { "epoch": 0.7715815302252955, "grad_norm": 0.16995325684547424, "learning_rate": 1.703645302708451e-05, "loss": 0.5057, "step": 3459 }, { "epoch": 0.7718045951371849, "grad_norm": 0.15925444662570953, "learning_rate": 1.703478059982458e-05, "loss": 0.5128, "step": 3460 }, { "epoch": 0.7720276600490743, "grad_norm": 0.17199598252773285, "learning_rate": 1.703310778293425e-05, "loss": 0.5088, "step": 3461 }, { "epoch": 0.7722507249609636, "grad_norm": 0.17313992977142334, "learning_rate": 1.7031434576506173e-05, "loss": 0.4796, "step": 3462 }, { "epoch": 0.772473789872853, "grad_norm": 0.1665591597557068, "learning_rate": 1.7029760980633016e-05, "loss": 0.4978, "step": 3463 }, { "epoch": 0.7726968547847424, "grad_norm": 0.16291576623916626, "learning_rate": 1.7028086995407477e-05, "loss": 0.4892, "step": 3464 }, { "epoch": 0.7729199196966318, "grad_norm": 0.15629801154136658, "learning_rate": 1.7026412620922276e-05, "loss": 0.483, "step": 3465 }, { "epoch": 0.773142984608521, "grad_norm": 0.16080623865127563, "learning_rate": 1.702473785727014e-05, "loss": 0.4968, "step": 3466 }, { "epoch": 0.7733660495204104, "grad_norm": 0.16567450761795044, "learning_rate": 1.702306270454384e-05, "loss": 0.5023, "step": 3467 }, { "epoch": 0.7735891144322998, "grad_norm": 0.16630107164382935, "learning_rate": 1.702138716283615e-05, "loss": 0.4554, "step": 3468 }, { "epoch": 0.7738121793441891, "grad_norm": 0.1635141223669052, "learning_rate": 1.7019711232239872e-05, "loss": 0.4947, "step": 3469 }, { "epoch": 0.7740352442560785, "grad_norm": 0.16662491858005524, "learning_rate": 1.7018034912847826e-05, "loss": 0.4947, "step": 3470 }, { "epoch": 0.7742583091679679, "grad_norm": 0.16234828531742096, "learning_rate": 1.7016358204752865e-05, "loss": 0.5424, "step": 3471 }, { "epoch": 0.7744813740798573, "grad_norm": 0.16206470131874084, "learning_rate": 1.701468110804785e-05, "loss": 0.4761, "step": 3472 }, { "epoch": 0.7747044389917466, "grad_norm": 0.1729600876569748, "learning_rate": 1.7013003622825674e-05, "loss": 0.4867, "step": 3473 }, { "epoch": 0.774927503903636, "grad_norm": 0.15577882528305054, "learning_rate": 1.7011325749179245e-05, "loss": 0.4935, "step": 3474 }, { "epoch": 0.7751505688155254, "grad_norm": 0.16553224623203278, "learning_rate": 1.7009647487201492e-05, "loss": 0.5052, "step": 3475 }, { "epoch": 0.7753736337274146, "grad_norm": 0.16988618671894073, "learning_rate": 1.700796883698536e-05, "loss": 0.4999, "step": 3476 }, { "epoch": 0.775596698639304, "grad_norm": 0.15535961091518402, "learning_rate": 1.7006289798623842e-05, "loss": 0.494, "step": 3477 }, { "epoch": 0.7758197635511934, "grad_norm": 0.16982175409793854, "learning_rate": 1.700461037220992e-05, "loss": 0.5057, "step": 3478 }, { "epoch": 0.7760428284630828, "grad_norm": 0.17564809322357178, "learning_rate": 1.7002930557836615e-05, "loss": 0.5088, "step": 3479 }, { "epoch": 0.7762658933749721, "grad_norm": 0.16570429503917694, "learning_rate": 1.7001250355596967e-05, "loss": 0.5223, "step": 3480 }, { "epoch": 0.7764889582868615, "grad_norm": 0.16374748945236206, "learning_rate": 1.6999569765584035e-05, "loss": 0.4533, "step": 3481 }, { "epoch": 0.7767120231987509, "grad_norm": 0.15578052401542664, "learning_rate": 1.69978887878909e-05, "loss": 0.4797, "step": 3482 }, { "epoch": 0.7769350881106402, "grad_norm": 0.15627166628837585, "learning_rate": 1.6996207422610664e-05, "loss": 0.4523, "step": 3483 }, { "epoch": 0.7771581530225296, "grad_norm": 0.1620151549577713, "learning_rate": 1.6994525669836453e-05, "loss": 0.4999, "step": 3484 }, { "epoch": 0.7773812179344189, "grad_norm": 0.15806102752685547, "learning_rate": 1.6992843529661413e-05, "loss": 0.5013, "step": 3485 }, { "epoch": 0.7776042828463082, "grad_norm": 0.25901806354522705, "learning_rate": 1.6991161002178712e-05, "loss": 0.5003, "step": 3486 }, { "epoch": 0.7778273477581976, "grad_norm": 0.15611301362514496, "learning_rate": 1.698947808748154e-05, "loss": 0.5079, "step": 3487 }, { "epoch": 0.778050412670087, "grad_norm": 0.1656789630651474, "learning_rate": 1.6987794785663107e-05, "loss": 0.4983, "step": 3488 }, { "epoch": 0.7782734775819764, "grad_norm": 0.16347582638263702, "learning_rate": 1.698611109681664e-05, "loss": 0.4887, "step": 3489 }, { "epoch": 0.7784965424938657, "grad_norm": 0.15885215997695923, "learning_rate": 1.69844270210354e-05, "loss": 0.4837, "step": 3490 }, { "epoch": 0.7787196074057551, "grad_norm": 0.17082704603672028, "learning_rate": 1.698274255841265e-05, "loss": 0.5165, "step": 3491 }, { "epoch": 0.7789426723176445, "grad_norm": 0.16731634736061096, "learning_rate": 1.6981057709041703e-05, "loss": 0.485, "step": 3492 }, { "epoch": 0.7791657372295338, "grad_norm": 0.16583065688610077, "learning_rate": 1.697937247301586e-05, "loss": 0.5117, "step": 3493 }, { "epoch": 0.7793888021414231, "grad_norm": 0.16518862545490265, "learning_rate": 1.6977686850428475e-05, "loss": 0.5141, "step": 3494 }, { "epoch": 0.7796118670533125, "grad_norm": 0.15372084081172943, "learning_rate": 1.69760008413729e-05, "loss": 0.4787, "step": 3495 }, { "epoch": 0.7798349319652019, "grad_norm": 0.16989074647426605, "learning_rate": 1.6974314445942514e-05, "loss": 0.4906, "step": 3496 }, { "epoch": 0.7800579968770912, "grad_norm": 0.16484175622463226, "learning_rate": 1.697262766423072e-05, "loss": 0.5, "step": 3497 }, { "epoch": 0.7802810617889806, "grad_norm": 0.16868092119693756, "learning_rate": 1.6970940496330953e-05, "loss": 0.5106, "step": 3498 }, { "epoch": 0.78050412670087, "grad_norm": 0.16630426049232483, "learning_rate": 1.6969252942336648e-05, "loss": 0.505, "step": 3499 }, { "epoch": 0.7807271916127593, "grad_norm": 0.15219101309776306, "learning_rate": 1.696756500234128e-05, "loss": 0.4698, "step": 3500 }, { "epoch": 0.7809502565246487, "grad_norm": 0.16838675737380981, "learning_rate": 1.6965876676438334e-05, "loss": 0.4897, "step": 3501 }, { "epoch": 0.7811733214365381, "grad_norm": 0.15935415029525757, "learning_rate": 1.696418796472132e-05, "loss": 0.508, "step": 3502 }, { "epoch": 0.7813963863484273, "grad_norm": 0.16184543073177338, "learning_rate": 1.696249886728377e-05, "loss": 0.5083, "step": 3503 }, { "epoch": 0.7816194512603167, "grad_norm": 0.15746904909610748, "learning_rate": 1.6960809384219237e-05, "loss": 0.4662, "step": 3504 }, { "epoch": 0.7818425161722061, "grad_norm": 0.1662943959236145, "learning_rate": 1.6959119515621295e-05, "loss": 0.4853, "step": 3505 }, { "epoch": 0.7820655810840955, "grad_norm": 0.16866953670978546, "learning_rate": 1.695742926158354e-05, "loss": 0.4919, "step": 3506 }, { "epoch": 0.7822886459959848, "grad_norm": 0.1712365448474884, "learning_rate": 1.695573862219959e-05, "loss": 0.4882, "step": 3507 }, { "epoch": 0.7825117109078742, "grad_norm": 0.17081387341022491, "learning_rate": 1.6954047597563078e-05, "loss": 0.5068, "step": 3508 }, { "epoch": 0.7827347758197636, "grad_norm": 0.16168633103370667, "learning_rate": 1.695235618776767e-05, "loss": 0.5119, "step": 3509 }, { "epoch": 0.7829578407316529, "grad_norm": 0.2024863213300705, "learning_rate": 1.6950664392907042e-05, "loss": 0.5039, "step": 3510 }, { "epoch": 0.7831809056435423, "grad_norm": 0.1648394763469696, "learning_rate": 1.6948972213074902e-05, "loss": 0.5304, "step": 3511 }, { "epoch": 0.7834039705554317, "grad_norm": 0.15931276977062225, "learning_rate": 1.6947279648364966e-05, "loss": 0.5197, "step": 3512 }, { "epoch": 0.783627035467321, "grad_norm": 0.15827037394046783, "learning_rate": 1.6945586698870985e-05, "loss": 0.513, "step": 3513 }, { "epoch": 0.7838501003792103, "grad_norm": 0.15963312983512878, "learning_rate": 1.694389336468672e-05, "loss": 0.5096, "step": 3514 }, { "epoch": 0.7840731652910997, "grad_norm": 0.1624821275472641, "learning_rate": 1.694219964590597e-05, "loss": 0.5086, "step": 3515 }, { "epoch": 0.7842962302029891, "grad_norm": 0.15603917837142944, "learning_rate": 1.694050554262253e-05, "loss": 0.499, "step": 3516 }, { "epoch": 0.7845192951148784, "grad_norm": 0.16541822254657745, "learning_rate": 1.6938811054930237e-05, "loss": 0.4843, "step": 3517 }, { "epoch": 0.7847423600267678, "grad_norm": 0.16056667268276215, "learning_rate": 1.693711618292294e-05, "loss": 0.4992, "step": 3518 }, { "epoch": 0.7849654249386572, "grad_norm": 0.15387259423732758, "learning_rate": 1.693542092669451e-05, "loss": 0.5056, "step": 3519 }, { "epoch": 0.7851884898505465, "grad_norm": 0.15904875099658966, "learning_rate": 1.6933725286338846e-05, "loss": 0.4718, "step": 3520 }, { "epoch": 0.7854115547624358, "grad_norm": 0.16445663571357727, "learning_rate": 1.693202926194986e-05, "loss": 0.4752, "step": 3521 }, { "epoch": 0.7856346196743252, "grad_norm": 0.16833317279815674, "learning_rate": 1.693033285362149e-05, "loss": 0.4626, "step": 3522 }, { "epoch": 0.7858576845862146, "grad_norm": 0.17628587782382965, "learning_rate": 1.692863606144769e-05, "loss": 0.5001, "step": 3523 }, { "epoch": 0.7860807494981039, "grad_norm": 0.16575254499912262, "learning_rate": 1.692693888552245e-05, "loss": 0.4987, "step": 3524 }, { "epoch": 0.7863038144099933, "grad_norm": 0.1609877198934555, "learning_rate": 1.6925241325939756e-05, "loss": 0.4897, "step": 3525 }, { "epoch": 0.7865268793218827, "grad_norm": 0.1630864441394806, "learning_rate": 1.6923543382793636e-05, "loss": 0.473, "step": 3526 }, { "epoch": 0.786749944233772, "grad_norm": 0.16511790454387665, "learning_rate": 1.6921845056178133e-05, "loss": 0.511, "step": 3527 }, { "epoch": 0.7869730091456614, "grad_norm": 0.15955112874507904, "learning_rate": 1.6920146346187312e-05, "loss": 0.4702, "step": 3528 }, { "epoch": 0.7871960740575508, "grad_norm": 0.16080255806446075, "learning_rate": 1.691844725291526e-05, "loss": 0.4897, "step": 3529 }, { "epoch": 0.7874191389694402, "grad_norm": 0.15729734301567078, "learning_rate": 1.6916747776456074e-05, "loss": 0.477, "step": 3530 }, { "epoch": 0.7876422038813294, "grad_norm": 0.18572328984737396, "learning_rate": 1.691504791690389e-05, "loss": 0.4953, "step": 3531 }, { "epoch": 0.7878652687932188, "grad_norm": 0.17920143902301788, "learning_rate": 1.6913347674352855e-05, "loss": 0.4932, "step": 3532 }, { "epoch": 0.7880883337051082, "grad_norm": 0.14651532471179962, "learning_rate": 1.691164704889714e-05, "loss": 0.4531, "step": 3533 }, { "epoch": 0.7883113986169975, "grad_norm": 0.16487324237823486, "learning_rate": 1.6909946040630935e-05, "loss": 0.487, "step": 3534 }, { "epoch": 0.7885344635288869, "grad_norm": 0.16055698692798615, "learning_rate": 1.6908244649648455e-05, "loss": 0.4754, "step": 3535 }, { "epoch": 0.7887575284407763, "grad_norm": 0.1739615648984909, "learning_rate": 1.690654287604393e-05, "loss": 0.5076, "step": 3536 }, { "epoch": 0.7889805933526656, "grad_norm": 0.17668306827545166, "learning_rate": 1.690484071991162e-05, "loss": 0.5177, "step": 3537 }, { "epoch": 0.789203658264555, "grad_norm": 0.17033684253692627, "learning_rate": 1.690313818134579e-05, "loss": 0.4884, "step": 3538 }, { "epoch": 0.7894267231764444, "grad_norm": 0.19986379146575928, "learning_rate": 1.690143526044075e-05, "loss": 0.4885, "step": 3539 }, { "epoch": 0.7896497880883337, "grad_norm": 0.17267881333827972, "learning_rate": 1.6899731957290814e-05, "loss": 0.498, "step": 3540 }, { "epoch": 0.789872853000223, "grad_norm": 0.15866464376449585, "learning_rate": 1.689802827199032e-05, "loss": 0.4856, "step": 3541 }, { "epoch": 0.7900959179121124, "grad_norm": 0.17956510186195374, "learning_rate": 1.689632420463363e-05, "loss": 0.5046, "step": 3542 }, { "epoch": 0.7903189828240018, "grad_norm": 0.17033924162387848, "learning_rate": 1.6894619755315127e-05, "loss": 0.5272, "step": 3543 }, { "epoch": 0.7905420477358911, "grad_norm": 0.15526439249515533, "learning_rate": 1.6892914924129212e-05, "loss": 0.4858, "step": 3544 }, { "epoch": 0.7907651126477805, "grad_norm": 0.16875073313713074, "learning_rate": 1.689120971117031e-05, "loss": 0.4957, "step": 3545 }, { "epoch": 0.7909881775596699, "grad_norm": 0.16168349981307983, "learning_rate": 1.6889504116532868e-05, "loss": 0.5041, "step": 3546 }, { "epoch": 0.7912112424715593, "grad_norm": 0.1654532253742218, "learning_rate": 1.688779814031135e-05, "loss": 0.5032, "step": 3547 }, { "epoch": 0.7914343073834486, "grad_norm": 0.16922220587730408, "learning_rate": 1.6886091782600248e-05, "loss": 0.5098, "step": 3548 }, { "epoch": 0.791657372295338, "grad_norm": 0.17434757947921753, "learning_rate": 1.6884385043494064e-05, "loss": 0.5232, "step": 3549 }, { "epoch": 0.7918804372072273, "grad_norm": 0.1557532101869583, "learning_rate": 1.688267792308733e-05, "loss": 0.4865, "step": 3550 }, { "epoch": 0.7921035021191166, "grad_norm": 0.1586635261774063, "learning_rate": 1.6880970421474604e-05, "loss": 0.4967, "step": 3551 }, { "epoch": 0.792326567031006, "grad_norm": 0.16100507974624634, "learning_rate": 1.6879262538750453e-05, "loss": 0.4804, "step": 3552 }, { "epoch": 0.7925496319428954, "grad_norm": 0.15817134082317352, "learning_rate": 1.6877554275009467e-05, "loss": 0.5066, "step": 3553 }, { "epoch": 0.7927726968547848, "grad_norm": 0.15588125586509705, "learning_rate": 1.6875845630346265e-05, "loss": 0.5074, "step": 3554 }, { "epoch": 0.7929957617666741, "grad_norm": 0.29957878589630127, "learning_rate": 1.687413660485548e-05, "loss": 0.4962, "step": 3555 }, { "epoch": 0.7932188266785635, "grad_norm": 0.15556854009628296, "learning_rate": 1.6872427198631772e-05, "loss": 0.4746, "step": 3556 }, { "epoch": 0.7934418915904529, "grad_norm": 0.1643342226743698, "learning_rate": 1.6870717411769818e-05, "loss": 0.5085, "step": 3557 }, { "epoch": 0.7936649565023421, "grad_norm": 0.16817674040794373, "learning_rate": 1.686900724436431e-05, "loss": 0.5078, "step": 3558 }, { "epoch": 0.7938880214142315, "grad_norm": 0.17203262448310852, "learning_rate": 1.6867296696509978e-05, "loss": 0.4775, "step": 3559 }, { "epoch": 0.7941110863261209, "grad_norm": 0.1592828631401062, "learning_rate": 1.6865585768301556e-05, "loss": 0.487, "step": 3560 }, { "epoch": 0.7943341512380102, "grad_norm": 0.15609320998191833, "learning_rate": 1.6863874459833806e-05, "loss": 0.4696, "step": 3561 }, { "epoch": 0.7945572161498996, "grad_norm": 0.17543616890907288, "learning_rate": 1.6862162771201515e-05, "loss": 0.5098, "step": 3562 }, { "epoch": 0.794780281061789, "grad_norm": 0.17788106203079224, "learning_rate": 1.6860450702499486e-05, "loss": 0.4826, "step": 3563 }, { "epoch": 0.7950033459736784, "grad_norm": 0.16301631927490234, "learning_rate": 1.685873825382254e-05, "loss": 0.5087, "step": 3564 }, { "epoch": 0.7952264108855677, "grad_norm": 0.16601480543613434, "learning_rate": 1.685702542526553e-05, "loss": 0.4624, "step": 3565 }, { "epoch": 0.7954494757974571, "grad_norm": 0.7922313809394836, "learning_rate": 1.6855312216923316e-05, "loss": 0.5045, "step": 3566 }, { "epoch": 0.7956725407093465, "grad_norm": 0.2716256380081177, "learning_rate": 1.685359862889079e-05, "loss": 0.5, "step": 3567 }, { "epoch": 0.7958956056212357, "grad_norm": 0.1633540540933609, "learning_rate": 1.6851884661262864e-05, "loss": 0.4944, "step": 3568 }, { "epoch": 0.7961186705331251, "grad_norm": 0.17535462975502014, "learning_rate": 1.6850170314134465e-05, "loss": 0.5251, "step": 3569 }, { "epoch": 0.7963417354450145, "grad_norm": 0.18369325995445251, "learning_rate": 1.6848455587600542e-05, "loss": 0.5043, "step": 3570 }, { "epoch": 0.7965648003569039, "grad_norm": 0.15560777485370636, "learning_rate": 1.6846740481756072e-05, "loss": 0.4686, "step": 3571 }, { "epoch": 0.7967878652687932, "grad_norm": 0.18801093101501465, "learning_rate": 1.6845024996696047e-05, "loss": 0.4946, "step": 3572 }, { "epoch": 0.7970109301806826, "grad_norm": 0.17000144720077515, "learning_rate": 1.684330913251548e-05, "loss": 0.5, "step": 3573 }, { "epoch": 0.797233995092572, "grad_norm": 0.16179999709129333, "learning_rate": 1.6841592889309405e-05, "loss": 0.4769, "step": 3574 }, { "epoch": 0.7974570600044613, "grad_norm": 0.16142041981220245, "learning_rate": 1.6839876267172883e-05, "loss": 0.4644, "step": 3575 }, { "epoch": 0.7976801249163507, "grad_norm": 0.16446426510810852, "learning_rate": 1.683815926620099e-05, "loss": 0.4835, "step": 3576 }, { "epoch": 0.79790318982824, "grad_norm": 0.16954201459884644, "learning_rate": 1.6836441886488822e-05, "loss": 0.5151, "step": 3577 }, { "epoch": 0.7981262547401293, "grad_norm": 0.16908320784568787, "learning_rate": 1.6834724128131496e-05, "loss": 0.4714, "step": 3578 }, { "epoch": 0.7983493196520187, "grad_norm": 0.1670169085264206, "learning_rate": 1.683300599122416e-05, "loss": 0.5004, "step": 3579 }, { "epoch": 0.7985723845639081, "grad_norm": 0.17246994376182556, "learning_rate": 1.683128747586197e-05, "loss": 0.5066, "step": 3580 }, { "epoch": 0.7987954494757975, "grad_norm": 0.16627082228660583, "learning_rate": 1.6829568582140108e-05, "loss": 0.4813, "step": 3581 }, { "epoch": 0.7990185143876868, "grad_norm": 0.16429170966148376, "learning_rate": 1.6827849310153778e-05, "loss": 0.4916, "step": 3582 }, { "epoch": 0.7992415792995762, "grad_norm": 0.1685648411512375, "learning_rate": 1.6826129659998204e-05, "loss": 0.4821, "step": 3583 }, { "epoch": 0.7994646442114656, "grad_norm": 0.1623144894838333, "learning_rate": 1.6824409631768633e-05, "loss": 0.5092, "step": 3584 }, { "epoch": 0.7996877091233549, "grad_norm": 0.16186010837554932, "learning_rate": 1.6822689225560328e-05, "loss": 0.4647, "step": 3585 }, { "epoch": 0.7999107740352442, "grad_norm": 0.15835130214691162, "learning_rate": 1.682096844146858e-05, "loss": 0.5139, "step": 3586 }, { "epoch": 0.8001338389471336, "grad_norm": 0.1640879362821579, "learning_rate": 1.681924727958869e-05, "loss": 0.4815, "step": 3587 }, { "epoch": 0.800356903859023, "grad_norm": 0.18087539076805115, "learning_rate": 1.681752574001599e-05, "loss": 0.5057, "step": 3588 }, { "epoch": 0.8005799687709123, "grad_norm": 0.16318152844905853, "learning_rate": 1.6815803822845834e-05, "loss": 0.4945, "step": 3589 }, { "epoch": 0.8008030336828017, "grad_norm": 0.16909107565879822, "learning_rate": 1.681408152817359e-05, "loss": 0.5123, "step": 3590 }, { "epoch": 0.8010260985946911, "grad_norm": 0.16913791000843048, "learning_rate": 1.6812358856094652e-05, "loss": 0.4933, "step": 3591 }, { "epoch": 0.8012491635065804, "grad_norm": 0.16670222580432892, "learning_rate": 1.681063580670442e-05, "loss": 0.5109, "step": 3592 }, { "epoch": 0.8014722284184698, "grad_norm": 0.16838552057743073, "learning_rate": 1.680891238009834e-05, "loss": 0.4926, "step": 3593 }, { "epoch": 0.8016952933303592, "grad_norm": 0.15705366432666779, "learning_rate": 1.6807188576371864e-05, "loss": 0.4762, "step": 3594 }, { "epoch": 0.8019183582422484, "grad_norm": 0.15505351126194, "learning_rate": 1.6805464395620465e-05, "loss": 0.4769, "step": 3595 }, { "epoch": 0.8021414231541378, "grad_norm": 0.17056378722190857, "learning_rate": 1.6803739837939642e-05, "loss": 0.5038, "step": 3596 }, { "epoch": 0.8023644880660272, "grad_norm": 0.15908926725387573, "learning_rate": 1.6802014903424905e-05, "loss": 0.5079, "step": 3597 }, { "epoch": 0.8025875529779166, "grad_norm": 0.16009296476840973, "learning_rate": 1.68002895921718e-05, "loss": 0.497, "step": 3598 }, { "epoch": 0.8028106178898059, "grad_norm": 0.17202340066432953, "learning_rate": 1.6798563904275882e-05, "loss": 0.5097, "step": 3599 }, { "epoch": 0.8030336828016953, "grad_norm": 0.16179756820201874, "learning_rate": 1.679683783983273e-05, "loss": 0.4992, "step": 3600 }, { "epoch": 0.8032567477135847, "grad_norm": 0.16165940463542938, "learning_rate": 1.6795111398937944e-05, "loss": 0.4959, "step": 3601 }, { "epoch": 0.803479812625474, "grad_norm": 0.17612482607364655, "learning_rate": 1.679338458168714e-05, "loss": 0.4937, "step": 3602 }, { "epoch": 0.8037028775373634, "grad_norm": 0.16228753328323364, "learning_rate": 1.679165738817597e-05, "loss": 0.5287, "step": 3603 }, { "epoch": 0.8039259424492528, "grad_norm": 0.17843377590179443, "learning_rate": 1.6789929818500096e-05, "loss": 0.4851, "step": 3604 }, { "epoch": 0.8041490073611421, "grad_norm": 0.1637219339609146, "learning_rate": 1.6788201872755196e-05, "loss": 0.4633, "step": 3605 }, { "epoch": 0.8043720722730314, "grad_norm": 0.16961325705051422, "learning_rate": 1.6786473551036978e-05, "loss": 0.5084, "step": 3606 }, { "epoch": 0.8045951371849208, "grad_norm": 0.15785901248455048, "learning_rate": 1.6784744853441167e-05, "loss": 0.4855, "step": 3607 }, { "epoch": 0.8048182020968102, "grad_norm": 0.1613706648349762, "learning_rate": 1.6783015780063503e-05, "loss": 0.4674, "step": 3608 }, { "epoch": 0.8050412670086995, "grad_norm": 0.15770243108272552, "learning_rate": 1.678128633099976e-05, "loss": 0.4802, "step": 3609 }, { "epoch": 0.8052643319205889, "grad_norm": 0.16045016050338745, "learning_rate": 1.677955650634573e-05, "loss": 0.4965, "step": 3610 }, { "epoch": 0.8054873968324783, "grad_norm": 0.15433759987354279, "learning_rate": 1.6777826306197208e-05, "loss": 0.4752, "step": 3611 }, { "epoch": 0.8057104617443676, "grad_norm": 0.16770930588245392, "learning_rate": 1.6776095730650034e-05, "loss": 0.5025, "step": 3612 }, { "epoch": 0.805933526656257, "grad_norm": 0.15930598974227905, "learning_rate": 1.6774364779800057e-05, "loss": 0.4995, "step": 3613 }, { "epoch": 0.8061565915681463, "grad_norm": 0.16089414060115814, "learning_rate": 1.6772633453743142e-05, "loss": 0.4973, "step": 3614 }, { "epoch": 0.8063796564800357, "grad_norm": 0.1723753660917282, "learning_rate": 1.6770901752575186e-05, "loss": 0.5267, "step": 3615 }, { "epoch": 0.806602721391925, "grad_norm": 0.15985791385173798, "learning_rate": 1.6769169676392103e-05, "loss": 0.4873, "step": 3616 }, { "epoch": 0.8068257863038144, "grad_norm": 0.15705527365207672, "learning_rate": 1.676743722528982e-05, "loss": 0.48, "step": 3617 }, { "epoch": 0.8070488512157038, "grad_norm": 0.17080652713775635, "learning_rate": 1.6765704399364297e-05, "loss": 0.507, "step": 3618 }, { "epoch": 0.8072719161275931, "grad_norm": 0.16177797317504883, "learning_rate": 1.6763971198711505e-05, "loss": 0.5168, "step": 3619 }, { "epoch": 0.8074949810394825, "grad_norm": 0.15763692557811737, "learning_rate": 1.6762237623427445e-05, "loss": 0.4736, "step": 3620 }, { "epoch": 0.8077180459513719, "grad_norm": 0.15657658874988556, "learning_rate": 1.6760503673608123e-05, "loss": 0.479, "step": 3621 }, { "epoch": 0.8079411108632613, "grad_norm": 0.15933813154697418, "learning_rate": 1.6758769349349586e-05, "loss": 0.519, "step": 3622 }, { "epoch": 0.8081641757751505, "grad_norm": 0.173330619931221, "learning_rate": 1.675703465074789e-05, "loss": 0.5056, "step": 3623 }, { "epoch": 0.8083872406870399, "grad_norm": 0.16727612912654877, "learning_rate": 1.6755299577899107e-05, "loss": 0.4949, "step": 3624 }, { "epoch": 0.8086103055989293, "grad_norm": 0.23466536402702332, "learning_rate": 1.6753564130899343e-05, "loss": 0.5203, "step": 3625 }, { "epoch": 0.8088333705108186, "grad_norm": 0.16051769256591797, "learning_rate": 1.6751828309844714e-05, "loss": 0.4907, "step": 3626 }, { "epoch": 0.809056435422708, "grad_norm": 0.16254782676696777, "learning_rate": 1.6750092114831368e-05, "loss": 0.4976, "step": 3627 }, { "epoch": 0.8092795003345974, "grad_norm": 0.1722305417060852, "learning_rate": 1.6748355545955456e-05, "loss": 0.4932, "step": 3628 }, { "epoch": 0.8095025652464868, "grad_norm": 0.16842518746852875, "learning_rate": 1.6746618603313165e-05, "loss": 0.4934, "step": 3629 }, { "epoch": 0.8097256301583761, "grad_norm": 0.16397298872470856, "learning_rate": 1.67448812870007e-05, "loss": 0.4824, "step": 3630 }, { "epoch": 0.8099486950702655, "grad_norm": 0.15751919150352478, "learning_rate": 1.674314359711428e-05, "loss": 0.5057, "step": 3631 }, { "epoch": 0.8101717599821548, "grad_norm": 0.15799839794635773, "learning_rate": 1.6741405533750154e-05, "loss": 0.4941, "step": 3632 }, { "epoch": 0.8103948248940441, "grad_norm": 0.16038256883621216, "learning_rate": 1.6739667097004583e-05, "loss": 0.4765, "step": 3633 }, { "epoch": 0.8106178898059335, "grad_norm": 0.18798737227916718, "learning_rate": 1.6737928286973852e-05, "loss": 0.5119, "step": 3634 }, { "epoch": 0.8108409547178229, "grad_norm": 0.1568623036146164, "learning_rate": 1.673618910375427e-05, "loss": 0.4881, "step": 3635 }, { "epoch": 0.8110640196297122, "grad_norm": 0.16038067638874054, "learning_rate": 1.6734449547442165e-05, "loss": 0.491, "step": 3636 }, { "epoch": 0.8112870845416016, "grad_norm": 0.1699703484773636, "learning_rate": 1.6732709618133882e-05, "loss": 0.4772, "step": 3637 }, { "epoch": 0.811510149453491, "grad_norm": 0.16090309619903564, "learning_rate": 1.673096931592579e-05, "loss": 0.4703, "step": 3638 }, { "epoch": 0.8117332143653804, "grad_norm": 0.16035638749599457, "learning_rate": 1.672922864091428e-05, "loss": 0.5076, "step": 3639 }, { "epoch": 0.8119562792772697, "grad_norm": 0.16750063002109528, "learning_rate": 1.6727487593195757e-05, "loss": 0.4961, "step": 3640 }, { "epoch": 0.812179344189159, "grad_norm": 0.15547265112400055, "learning_rate": 1.6725746172866652e-05, "loss": 0.4759, "step": 3641 }, { "epoch": 0.8124024091010484, "grad_norm": 0.15655042231082916, "learning_rate": 1.672400438002342e-05, "loss": 0.511, "step": 3642 }, { "epoch": 0.8126254740129377, "grad_norm": 0.1612492948770523, "learning_rate": 1.6722262214762527e-05, "loss": 0.4901, "step": 3643 }, { "epoch": 0.8128485389248271, "grad_norm": 0.15349607169628143, "learning_rate": 1.6720519677180472e-05, "loss": 0.5094, "step": 3644 }, { "epoch": 0.8130716038367165, "grad_norm": 0.16423405706882477, "learning_rate": 1.671877676737376e-05, "loss": 0.513, "step": 3645 }, { "epoch": 0.8132946687486059, "grad_norm": 0.1583424061536789, "learning_rate": 1.671703348543893e-05, "loss": 0.4795, "step": 3646 }, { "epoch": 0.8135177336604952, "grad_norm": 0.16294583678245544, "learning_rate": 1.671528983147253e-05, "loss": 0.5019, "step": 3647 }, { "epoch": 0.8137407985723846, "grad_norm": 0.1648416668176651, "learning_rate": 1.671354580557114e-05, "loss": 0.4879, "step": 3648 }, { "epoch": 0.813963863484274, "grad_norm": 0.16462133824825287, "learning_rate": 1.6711801407831356e-05, "loss": 0.4911, "step": 3649 }, { "epoch": 0.8141869283961632, "grad_norm": 0.26390138268470764, "learning_rate": 1.671005663834979e-05, "loss": 0.4822, "step": 3650 }, { "epoch": 0.8144099933080526, "grad_norm": 0.17741946876049042, "learning_rate": 1.670831149722308e-05, "loss": 0.5237, "step": 3651 }, { "epoch": 0.814633058219942, "grad_norm": 0.18031084537506104, "learning_rate": 1.670656598454788e-05, "loss": 0.4956, "step": 3652 }, { "epoch": 0.8148561231318313, "grad_norm": 0.1602051854133606, "learning_rate": 1.670482010042087e-05, "loss": 0.4695, "step": 3653 }, { "epoch": 0.8150791880437207, "grad_norm": 0.15887601673603058, "learning_rate": 1.670307384493875e-05, "loss": 0.4907, "step": 3654 }, { "epoch": 0.8153022529556101, "grad_norm": 0.16547198593616486, "learning_rate": 1.6701327218198234e-05, "loss": 0.4944, "step": 3655 }, { "epoch": 0.8155253178674995, "grad_norm": 0.16335895657539368, "learning_rate": 1.6699580220296065e-05, "loss": 0.484, "step": 3656 }, { "epoch": 0.8157483827793888, "grad_norm": 0.1715383529663086, "learning_rate": 1.6697832851329002e-05, "loss": 0.5086, "step": 3657 }, { "epoch": 0.8159714476912782, "grad_norm": 0.16271516680717468, "learning_rate": 1.6696085111393825e-05, "loss": 0.4771, "step": 3658 }, { "epoch": 0.8161945126031676, "grad_norm": 0.16480712592601776, "learning_rate": 1.6694337000587334e-05, "loss": 0.5099, "step": 3659 }, { "epoch": 0.8164175775150568, "grad_norm": 0.15898552536964417, "learning_rate": 1.669258851900635e-05, "loss": 0.4987, "step": 3660 }, { "epoch": 0.8166406424269462, "grad_norm": 0.16049543023109436, "learning_rate": 1.6690839666747717e-05, "loss": 0.4762, "step": 3661 }, { "epoch": 0.8168637073388356, "grad_norm": 0.15958286821842194, "learning_rate": 1.6689090443908296e-05, "loss": 0.4827, "step": 3662 }, { "epoch": 0.817086772250725, "grad_norm": 0.16123978793621063, "learning_rate": 1.668734085058497e-05, "loss": 0.4803, "step": 3663 }, { "epoch": 0.8173098371626143, "grad_norm": 0.1554545909166336, "learning_rate": 1.668559088687464e-05, "loss": 0.4813, "step": 3664 }, { "epoch": 0.8175329020745037, "grad_norm": 0.15960079431533813, "learning_rate": 1.6683840552874235e-05, "loss": 0.4947, "step": 3665 }, { "epoch": 0.8177559669863931, "grad_norm": 0.15964864194393158, "learning_rate": 1.6682089848680698e-05, "loss": 0.4815, "step": 3666 }, { "epoch": 0.8179790318982824, "grad_norm": 0.16264641284942627, "learning_rate": 1.6680338774390993e-05, "loss": 0.509, "step": 3667 }, { "epoch": 0.8182020968101718, "grad_norm": 0.164497509598732, "learning_rate": 1.6678587330102103e-05, "loss": 0.5247, "step": 3668 }, { "epoch": 0.8184251617220611, "grad_norm": 0.16061222553253174, "learning_rate": 1.667683551591104e-05, "loss": 0.5161, "step": 3669 }, { "epoch": 0.8186482266339504, "grad_norm": 0.19114501774311066, "learning_rate": 1.6675083331914823e-05, "loss": 0.5119, "step": 3670 }, { "epoch": 0.8188712915458398, "grad_norm": 0.18399201333522797, "learning_rate": 1.6673330778210508e-05, "loss": 0.4907, "step": 3671 }, { "epoch": 0.8190943564577292, "grad_norm": 0.15932810306549072, "learning_rate": 1.6671577854895153e-05, "loss": 0.5102, "step": 3672 }, { "epoch": 0.8193174213696186, "grad_norm": 0.1565459966659546, "learning_rate": 1.6669824562065856e-05, "loss": 0.4752, "step": 3673 }, { "epoch": 0.8195404862815079, "grad_norm": 0.1646818071603775, "learning_rate": 1.6668070899819714e-05, "loss": 0.5099, "step": 3674 }, { "epoch": 0.8197635511933973, "grad_norm": 0.1591091752052307, "learning_rate": 1.6666316868253867e-05, "loss": 0.4969, "step": 3675 }, { "epoch": 0.8199866161052867, "grad_norm": 0.16319642961025238, "learning_rate": 1.6664562467465455e-05, "loss": 0.5129, "step": 3676 }, { "epoch": 0.820209681017176, "grad_norm": 0.16143767535686493, "learning_rate": 1.6662807697551654e-05, "loss": 0.4952, "step": 3677 }, { "epoch": 0.8204327459290653, "grad_norm": 0.15746799111366272, "learning_rate": 1.666105255860965e-05, "loss": 0.4947, "step": 3678 }, { "epoch": 0.8206558108409547, "grad_norm": 0.16585499048233032, "learning_rate": 1.6659297050736657e-05, "loss": 0.5134, "step": 3679 }, { "epoch": 0.8208788757528441, "grad_norm": 0.16004452109336853, "learning_rate": 1.6657541174029902e-05, "loss": 0.4995, "step": 3680 }, { "epoch": 0.8211019406647334, "grad_norm": 0.1682739406824112, "learning_rate": 1.665578492858664e-05, "loss": 0.4796, "step": 3681 }, { "epoch": 0.8213250055766228, "grad_norm": 0.1618606299161911, "learning_rate": 1.6654028314504147e-05, "loss": 0.4849, "step": 3682 }, { "epoch": 0.8215480704885122, "grad_norm": 0.1632058471441269, "learning_rate": 1.6652271331879706e-05, "loss": 0.499, "step": 3683 }, { "epoch": 0.8217711354004015, "grad_norm": 0.16575373709201813, "learning_rate": 1.665051398081064e-05, "loss": 0.5193, "step": 3684 }, { "epoch": 0.8219942003122909, "grad_norm": 0.16228437423706055, "learning_rate": 1.664875626139427e-05, "loss": 0.5065, "step": 3685 }, { "epoch": 0.8222172652241803, "grad_norm": 0.1525840312242508, "learning_rate": 1.6646998173727955e-05, "loss": 0.4904, "step": 3686 }, { "epoch": 0.8224403301360695, "grad_norm": 0.16036051511764526, "learning_rate": 1.6645239717909074e-05, "loss": 0.4798, "step": 3687 }, { "epoch": 0.8226633950479589, "grad_norm": 0.16049040853977203, "learning_rate": 1.6643480894035015e-05, "loss": 0.4985, "step": 3688 }, { "epoch": 0.8228864599598483, "grad_norm": 0.16121259331703186, "learning_rate": 1.6641721702203196e-05, "loss": 0.4586, "step": 3689 }, { "epoch": 0.8231095248717377, "grad_norm": 0.1610342562198639, "learning_rate": 1.663996214251105e-05, "loss": 0.4781, "step": 3690 }, { "epoch": 0.823332589783627, "grad_norm": 0.17599108815193176, "learning_rate": 1.6638202215056036e-05, "loss": 0.509, "step": 3691 }, { "epoch": 0.8235556546955164, "grad_norm": 0.15026211738586426, "learning_rate": 1.6636441919935627e-05, "loss": 0.4718, "step": 3692 }, { "epoch": 0.8237787196074058, "grad_norm": 0.1567082703113556, "learning_rate": 1.6634681257247314e-05, "loss": 0.5084, "step": 3693 }, { "epoch": 0.8240017845192951, "grad_norm": 0.1553567349910736, "learning_rate": 1.6632920227088628e-05, "loss": 0.481, "step": 3694 }, { "epoch": 0.8242248494311845, "grad_norm": 0.16551898419857025, "learning_rate": 1.663115882955709e-05, "loss": 0.5145, "step": 3695 }, { "epoch": 0.8244479143430739, "grad_norm": 0.1742102950811386, "learning_rate": 1.6629397064750267e-05, "loss": 0.5073, "step": 3696 }, { "epoch": 0.8246709792549632, "grad_norm": 0.16208507120609283, "learning_rate": 1.6627634932765735e-05, "loss": 0.4637, "step": 3697 }, { "epoch": 0.8248940441668525, "grad_norm": 0.1664377748966217, "learning_rate": 1.662587243370109e-05, "loss": 0.4927, "step": 3698 }, { "epoch": 0.8251171090787419, "grad_norm": 0.16273631155490875, "learning_rate": 1.662410956765395e-05, "loss": 0.4861, "step": 3699 }, { "epoch": 0.8253401739906313, "grad_norm": 0.16939525306224823, "learning_rate": 1.6622346334721956e-05, "loss": 0.4973, "step": 3700 }, { "epoch": 0.8255632389025206, "grad_norm": 0.16064801812171936, "learning_rate": 1.6620582735002762e-05, "loss": 0.4815, "step": 3701 }, { "epoch": 0.82578630381441, "grad_norm": 0.17662890255451202, "learning_rate": 1.6618818768594058e-05, "loss": 0.4851, "step": 3702 }, { "epoch": 0.8260093687262994, "grad_norm": 0.16551104187965393, "learning_rate": 1.6617054435593535e-05, "loss": 0.4872, "step": 3703 }, { "epoch": 0.8262324336381888, "grad_norm": 0.16171106696128845, "learning_rate": 1.6615289736098912e-05, "loss": 0.4801, "step": 3704 }, { "epoch": 0.826455498550078, "grad_norm": 0.15718747675418854, "learning_rate": 1.6613524670207933e-05, "loss": 0.4687, "step": 3705 }, { "epoch": 0.8266785634619674, "grad_norm": 0.16042965650558472, "learning_rate": 1.6611759238018356e-05, "loss": 0.5026, "step": 3706 }, { "epoch": 0.8269016283738568, "grad_norm": 0.1648973822593689, "learning_rate": 1.660999343962796e-05, "loss": 0.5002, "step": 3707 }, { "epoch": 0.8271246932857461, "grad_norm": 0.16398411989212036, "learning_rate": 1.6608227275134555e-05, "loss": 0.4601, "step": 3708 }, { "epoch": 0.8273477581976355, "grad_norm": 0.16151902079582214, "learning_rate": 1.6606460744635952e-05, "loss": 0.5171, "step": 3709 }, { "epoch": 0.8275708231095249, "grad_norm": 0.15724126994609833, "learning_rate": 1.660469384823e-05, "loss": 0.4794, "step": 3710 }, { "epoch": 0.8277938880214142, "grad_norm": 0.16321682929992676, "learning_rate": 1.6602926586014555e-05, "loss": 0.4987, "step": 3711 }, { "epoch": 0.8280169529333036, "grad_norm": 0.17783483862876892, "learning_rate": 1.66011589580875e-05, "loss": 0.4795, "step": 3712 }, { "epoch": 0.828240017845193, "grad_norm": 0.16518917679786682, "learning_rate": 1.659939096454674e-05, "loss": 0.5131, "step": 3713 }, { "epoch": 0.8284630827570824, "grad_norm": 0.1636318564414978, "learning_rate": 1.6597622605490198e-05, "loss": 0.4772, "step": 3714 }, { "epoch": 0.8286861476689716, "grad_norm": 0.16061849892139435, "learning_rate": 1.6595853881015814e-05, "loss": 0.5001, "step": 3715 }, { "epoch": 0.828909212580861, "grad_norm": 0.15872059762477875, "learning_rate": 1.6594084791221554e-05, "loss": 0.4987, "step": 3716 }, { "epoch": 0.8291322774927504, "grad_norm": 0.15593315660953522, "learning_rate": 1.65923153362054e-05, "loss": 0.4752, "step": 3717 }, { "epoch": 0.8293553424046397, "grad_norm": 0.15845991671085358, "learning_rate": 1.6590545516065353e-05, "loss": 0.5206, "step": 3718 }, { "epoch": 0.8295784073165291, "grad_norm": 0.15479490160942078, "learning_rate": 1.658877533089944e-05, "loss": 0.4706, "step": 3719 }, { "epoch": 0.8298014722284185, "grad_norm": 0.16462849080562592, "learning_rate": 1.6587004780805704e-05, "loss": 0.4816, "step": 3720 }, { "epoch": 0.8300245371403079, "grad_norm": 0.17479833960533142, "learning_rate": 1.658523386588221e-05, "loss": 0.5083, "step": 3721 }, { "epoch": 0.8302476020521972, "grad_norm": 0.16387352347373962, "learning_rate": 1.658346258622704e-05, "loss": 0.4897, "step": 3722 }, { "epoch": 0.8304706669640866, "grad_norm": 0.16821224987506866, "learning_rate": 1.6581690941938307e-05, "loss": 0.4928, "step": 3723 }, { "epoch": 0.830693731875976, "grad_norm": 0.15833254158496857, "learning_rate": 1.657991893311412e-05, "loss": 0.4523, "step": 3724 }, { "epoch": 0.8309167967878652, "grad_norm": 0.15844541788101196, "learning_rate": 1.657814655985264e-05, "loss": 0.4989, "step": 3725 }, { "epoch": 0.8311398616997546, "grad_norm": 0.16405586898326874, "learning_rate": 1.657637382225202e-05, "loss": 0.512, "step": 3726 }, { "epoch": 0.831362926611644, "grad_norm": 0.16231343150138855, "learning_rate": 1.6574600720410455e-05, "loss": 0.4856, "step": 3727 }, { "epoch": 0.8315859915235333, "grad_norm": 0.16474612057209015, "learning_rate": 1.6572827254426145e-05, "loss": 0.495, "step": 3728 }, { "epoch": 0.8318090564354227, "grad_norm": 0.16462446749210358, "learning_rate": 1.6571053424397316e-05, "loss": 0.4931, "step": 3729 }, { "epoch": 0.8320321213473121, "grad_norm": 0.1530153751373291, "learning_rate": 1.6569279230422215e-05, "loss": 0.4752, "step": 3730 }, { "epoch": 0.8322551862592015, "grad_norm": 0.15689024329185486, "learning_rate": 1.656750467259911e-05, "loss": 0.4899, "step": 3731 }, { "epoch": 0.8324782511710908, "grad_norm": 0.17177921533584595, "learning_rate": 1.656572975102628e-05, "loss": 0.4941, "step": 3732 }, { "epoch": 0.8327013160829801, "grad_norm": 0.15630176663398743, "learning_rate": 1.6563954465802042e-05, "loss": 0.4768, "step": 3733 }, { "epoch": 0.8329243809948695, "grad_norm": 0.17884239554405212, "learning_rate": 1.6562178817024713e-05, "loss": 0.5076, "step": 3734 }, { "epoch": 0.8331474459067588, "grad_norm": 0.15849219262599945, "learning_rate": 1.6560402804792644e-05, "loss": 0.4948, "step": 3735 }, { "epoch": 0.8333705108186482, "grad_norm": 0.15310537815093994, "learning_rate": 1.65586264292042e-05, "loss": 0.467, "step": 3736 }, { "epoch": 0.8335935757305376, "grad_norm": 0.1629456728696823, "learning_rate": 1.6556849690357776e-05, "loss": 0.4794, "step": 3737 }, { "epoch": 0.833816640642427, "grad_norm": 0.16876748204231262, "learning_rate": 1.6555072588351765e-05, "loss": 0.497, "step": 3738 }, { "epoch": 0.8340397055543163, "grad_norm": 0.17010155320167542, "learning_rate": 1.6553295123284605e-05, "loss": 0.5199, "step": 3739 }, { "epoch": 0.8342627704662057, "grad_norm": 0.16115908324718475, "learning_rate": 1.6551517295254732e-05, "loss": 0.5094, "step": 3740 }, { "epoch": 0.8344858353780951, "grad_norm": 0.18651996552944183, "learning_rate": 1.6549739104360627e-05, "loss": 0.4963, "step": 3741 }, { "epoch": 0.8347089002899843, "grad_norm": 0.16856126487255096, "learning_rate": 1.6547960550700766e-05, "loss": 0.4729, "step": 3742 }, { "epoch": 0.8349319652018737, "grad_norm": 0.16244658827781677, "learning_rate": 1.6546181634373666e-05, "loss": 0.4964, "step": 3743 }, { "epoch": 0.8351550301137631, "grad_norm": 0.15980856120586395, "learning_rate": 1.654440235547785e-05, "loss": 0.4741, "step": 3744 }, { "epoch": 0.8353780950256524, "grad_norm": 0.15810216963291168, "learning_rate": 1.6542622714111865e-05, "loss": 0.4905, "step": 3745 }, { "epoch": 0.8356011599375418, "grad_norm": 0.15649911761283875, "learning_rate": 1.654084271037428e-05, "loss": 0.4883, "step": 3746 }, { "epoch": 0.8358242248494312, "grad_norm": 0.16376326978206635, "learning_rate": 1.653906234436368e-05, "loss": 0.4788, "step": 3747 }, { "epoch": 0.8360472897613206, "grad_norm": 0.17131556570529938, "learning_rate": 1.6537281616178674e-05, "loss": 0.5121, "step": 3748 }, { "epoch": 0.8362703546732099, "grad_norm": 0.1694851517677307, "learning_rate": 1.6535500525917893e-05, "loss": 0.5165, "step": 3749 }, { "epoch": 0.8364934195850993, "grad_norm": 0.16306069493293762, "learning_rate": 1.653371907367998e-05, "loss": 0.493, "step": 3750 }, { "epoch": 0.8367164844969887, "grad_norm": 0.1630067676305771, "learning_rate": 1.6531937259563612e-05, "loss": 0.4992, "step": 3751 }, { "epoch": 0.8369395494088779, "grad_norm": 0.15753702819347382, "learning_rate": 1.6530155083667468e-05, "loss": 0.4767, "step": 3752 }, { "epoch": 0.8371626143207673, "grad_norm": 0.1651187390089035, "learning_rate": 1.6528372546090258e-05, "loss": 0.4987, "step": 3753 }, { "epoch": 0.8373856792326567, "grad_norm": 0.1615830808877945, "learning_rate": 1.6526589646930712e-05, "loss": 0.4932, "step": 3754 }, { "epoch": 0.8376087441445461, "grad_norm": 0.1610192209482193, "learning_rate": 1.6524806386287578e-05, "loss": 0.5062, "step": 3755 }, { "epoch": 0.8378318090564354, "grad_norm": 0.16058041155338287, "learning_rate": 1.652302276425962e-05, "loss": 0.4937, "step": 3756 }, { "epoch": 0.8380548739683248, "grad_norm": 0.15800607204437256, "learning_rate": 1.6521238780945635e-05, "loss": 0.4814, "step": 3757 }, { "epoch": 0.8382779388802142, "grad_norm": 0.16751538217067719, "learning_rate": 1.6519454436444423e-05, "loss": 0.4904, "step": 3758 }, { "epoch": 0.8385010037921035, "grad_norm": 0.16399426758289337, "learning_rate": 1.651766973085482e-05, "loss": 0.5086, "step": 3759 }, { "epoch": 0.8387240687039929, "grad_norm": 0.16619375348091125, "learning_rate": 1.6515884664275663e-05, "loss": 0.5192, "step": 3760 }, { "epoch": 0.8389471336158822, "grad_norm": 0.1645645648241043, "learning_rate": 1.651409923680583e-05, "loss": 0.4894, "step": 3761 }, { "epoch": 0.8391701985277715, "grad_norm": 0.16611702740192413, "learning_rate": 1.6512313448544207e-05, "loss": 0.4858, "step": 3762 }, { "epoch": 0.8393932634396609, "grad_norm": 0.15403629839420319, "learning_rate": 1.6510527299589696e-05, "loss": 0.4844, "step": 3763 }, { "epoch": 0.8396163283515503, "grad_norm": 0.15380257368087769, "learning_rate": 1.6508740790041236e-05, "loss": 0.491, "step": 3764 }, { "epoch": 0.8398393932634397, "grad_norm": 0.1628074049949646, "learning_rate": 1.650695391999777e-05, "loss": 0.4666, "step": 3765 }, { "epoch": 0.840062458175329, "grad_norm": 0.16377408802509308, "learning_rate": 1.650516668955826e-05, "loss": 0.4695, "step": 3766 }, { "epoch": 0.8402855230872184, "grad_norm": 0.16199228167533875, "learning_rate": 1.6503379098821705e-05, "loss": 0.48, "step": 3767 }, { "epoch": 0.8405085879991078, "grad_norm": 0.17420238256454468, "learning_rate": 1.6501591147887108e-05, "loss": 0.4983, "step": 3768 }, { "epoch": 0.840731652910997, "grad_norm": 0.1635866016149521, "learning_rate": 1.649980283685349e-05, "loss": 0.4877, "step": 3769 }, { "epoch": 0.8409547178228864, "grad_norm": 0.16857163608074188, "learning_rate": 1.6498014165819908e-05, "loss": 0.4956, "step": 3770 }, { "epoch": 0.8411777827347758, "grad_norm": 0.17237527668476105, "learning_rate": 1.649622513488543e-05, "loss": 0.515, "step": 3771 }, { "epoch": 0.8414008476466652, "grad_norm": 0.1601458191871643, "learning_rate": 1.6494435744149142e-05, "loss": 0.4839, "step": 3772 }, { "epoch": 0.8416239125585545, "grad_norm": 0.1580028235912323, "learning_rate": 1.6492645993710148e-05, "loss": 0.4529, "step": 3773 }, { "epoch": 0.8418469774704439, "grad_norm": 0.16191108524799347, "learning_rate": 1.649085588366758e-05, "loss": 0.495, "step": 3774 }, { "epoch": 0.8420700423823333, "grad_norm": 0.16524077951908112, "learning_rate": 1.6489065414120583e-05, "loss": 0.4832, "step": 3775 }, { "epoch": 0.8422931072942226, "grad_norm": 0.16424553096294403, "learning_rate": 1.6487274585168327e-05, "loss": 0.4811, "step": 3776 }, { "epoch": 0.842516172206112, "grad_norm": 0.16102652251720428, "learning_rate": 1.6485483396909997e-05, "loss": 0.486, "step": 3777 }, { "epoch": 0.8427392371180014, "grad_norm": 0.16395264863967896, "learning_rate": 1.64836918494448e-05, "loss": 0.4956, "step": 3778 }, { "epoch": 0.8429623020298908, "grad_norm": 0.17286796867847443, "learning_rate": 1.6481899942871967e-05, "loss": 0.4859, "step": 3779 }, { "epoch": 0.84318536694178, "grad_norm": 0.16825571656227112, "learning_rate": 1.648010767729074e-05, "loss": 0.4873, "step": 3780 }, { "epoch": 0.8434084318536694, "grad_norm": 0.14970429241657257, "learning_rate": 1.647831505280039e-05, "loss": 0.4725, "step": 3781 }, { "epoch": 0.8436314967655588, "grad_norm": 0.1689714640378952, "learning_rate": 1.64765220695002e-05, "loss": 0.4865, "step": 3782 }, { "epoch": 0.8438545616774481, "grad_norm": 0.18679800629615784, "learning_rate": 1.647472872748948e-05, "loss": 0.5144, "step": 3783 }, { "epoch": 0.8440776265893375, "grad_norm": 0.15957824885845184, "learning_rate": 1.6472935026867555e-05, "loss": 0.4969, "step": 3784 }, { "epoch": 0.8443006915012269, "grad_norm": 0.1743471622467041, "learning_rate": 1.6471140967733772e-05, "loss": 0.4945, "step": 3785 }, { "epoch": 0.8445237564131162, "grad_norm": 0.17243602871894836, "learning_rate": 1.64693465501875e-05, "loss": 0.4963, "step": 3786 }, { "epoch": 0.8447468213250056, "grad_norm": 0.1854105442762375, "learning_rate": 1.646755177432812e-05, "loss": 0.4655, "step": 3787 }, { "epoch": 0.844969886236895, "grad_norm": 0.1698131114244461, "learning_rate": 1.6465756640255038e-05, "loss": 0.5208, "step": 3788 }, { "epoch": 0.8451929511487843, "grad_norm": 0.17987042665481567, "learning_rate": 1.6463961148067685e-05, "loss": 0.5031, "step": 3789 }, { "epoch": 0.8454160160606736, "grad_norm": 0.15817251801490784, "learning_rate": 1.6462165297865503e-05, "loss": 0.4892, "step": 3790 }, { "epoch": 0.845639080972563, "grad_norm": 0.16425161063671112, "learning_rate": 1.6460369089747956e-05, "loss": 0.4867, "step": 3791 }, { "epoch": 0.8458621458844524, "grad_norm": 0.164134219288826, "learning_rate": 1.6458572523814535e-05, "loss": 0.5063, "step": 3792 }, { "epoch": 0.8460852107963417, "grad_norm": 0.1651495099067688, "learning_rate": 1.6456775600164737e-05, "loss": 0.5168, "step": 3793 }, { "epoch": 0.8463082757082311, "grad_norm": 0.1596038043498993, "learning_rate": 1.6454978318898093e-05, "loss": 0.4948, "step": 3794 }, { "epoch": 0.8465313406201205, "grad_norm": 0.16954916715621948, "learning_rate": 1.645318068011415e-05, "loss": 0.5364, "step": 3795 }, { "epoch": 0.8467544055320099, "grad_norm": 0.16513592004776, "learning_rate": 1.6451382683912468e-05, "loss": 0.4905, "step": 3796 }, { "epoch": 0.8469774704438991, "grad_norm": 0.17096330225467682, "learning_rate": 1.6449584330392627e-05, "loss": 0.4687, "step": 3797 }, { "epoch": 0.8472005353557885, "grad_norm": 0.16846898198127747, "learning_rate": 1.644778561965424e-05, "loss": 0.4803, "step": 3798 }, { "epoch": 0.8474236002676779, "grad_norm": 0.16746586561203003, "learning_rate": 1.644598655179693e-05, "loss": 0.5015, "step": 3799 }, { "epoch": 0.8476466651795672, "grad_norm": 0.16072294116020203, "learning_rate": 1.6444187126920334e-05, "loss": 0.4993, "step": 3800 }, { "epoch": 0.8478697300914566, "grad_norm": 0.18298223614692688, "learning_rate": 1.644238734512412e-05, "loss": 0.4923, "step": 3801 }, { "epoch": 0.848092795003346, "grad_norm": 0.1531648337841034, "learning_rate": 1.6440587206507972e-05, "loss": 0.4731, "step": 3802 }, { "epoch": 0.8483158599152353, "grad_norm": 0.16385571658611298, "learning_rate": 1.6438786711171588e-05, "loss": 0.4718, "step": 3803 }, { "epoch": 0.8485389248271247, "grad_norm": 0.15524934232234955, "learning_rate": 1.6436985859214698e-05, "loss": 0.4886, "step": 3804 }, { "epoch": 0.8487619897390141, "grad_norm": 0.16199147701263428, "learning_rate": 1.643518465073704e-05, "loss": 0.5056, "step": 3805 }, { "epoch": 0.8489850546509035, "grad_norm": 0.17070268094539642, "learning_rate": 1.6433383085838378e-05, "loss": 0.5157, "step": 3806 }, { "epoch": 0.8492081195627927, "grad_norm": 0.19035401940345764, "learning_rate": 1.643158116461849e-05, "loss": 0.5135, "step": 3807 }, { "epoch": 0.8494311844746821, "grad_norm": 0.16606462001800537, "learning_rate": 1.6429778887177182e-05, "loss": 0.5058, "step": 3808 }, { "epoch": 0.8496542493865715, "grad_norm": 0.16338834166526794, "learning_rate": 1.6427976253614275e-05, "loss": 0.4914, "step": 3809 }, { "epoch": 0.8498773142984608, "grad_norm": 0.16145442426204681, "learning_rate": 1.6426173264029614e-05, "loss": 0.4999, "step": 3810 }, { "epoch": 0.8501003792103502, "grad_norm": 0.16404959559440613, "learning_rate": 1.642436991852305e-05, "loss": 0.5014, "step": 3811 }, { "epoch": 0.8503234441222396, "grad_norm": 0.16108281910419464, "learning_rate": 1.642256621719447e-05, "loss": 0.4702, "step": 3812 }, { "epoch": 0.850546509034129, "grad_norm": 0.15342526137828827, "learning_rate": 1.642076216014377e-05, "loss": 0.4686, "step": 3813 }, { "epoch": 0.8507695739460183, "grad_norm": 0.16050684452056885, "learning_rate": 1.6418957747470877e-05, "loss": 0.4729, "step": 3814 }, { "epoch": 0.8509926388579077, "grad_norm": 0.17533640563488007, "learning_rate": 1.641715297927573e-05, "loss": 0.4926, "step": 3815 }, { "epoch": 0.851215703769797, "grad_norm": 0.16416087746620178, "learning_rate": 1.641534785565828e-05, "loss": 0.513, "step": 3816 }, { "epoch": 0.8514387686816863, "grad_norm": 0.1623440384864807, "learning_rate": 1.6413542376718513e-05, "loss": 0.49, "step": 3817 }, { "epoch": 0.8516618335935757, "grad_norm": 0.1671840250492096, "learning_rate": 1.641173654255643e-05, "loss": 0.5044, "step": 3818 }, { "epoch": 0.8518848985054651, "grad_norm": 0.1557224541902542, "learning_rate": 1.640993035327204e-05, "loss": 0.4984, "step": 3819 }, { "epoch": 0.8521079634173544, "grad_norm": 0.15791340172290802, "learning_rate": 1.6408123808965392e-05, "loss": 0.4801, "step": 3820 }, { "epoch": 0.8523310283292438, "grad_norm": 0.1637008786201477, "learning_rate": 1.6406316909736536e-05, "loss": 0.4989, "step": 3821 }, { "epoch": 0.8525540932411332, "grad_norm": 0.16228072345256805, "learning_rate": 1.6404509655685555e-05, "loss": 0.4568, "step": 3822 }, { "epoch": 0.8527771581530226, "grad_norm": 0.16301074624061584, "learning_rate": 1.640270204691254e-05, "loss": 0.4741, "step": 3823 }, { "epoch": 0.8530002230649119, "grad_norm": 0.15945035219192505, "learning_rate": 1.6400894083517612e-05, "loss": 0.4885, "step": 3824 }, { "epoch": 0.8532232879768012, "grad_norm": 0.16819296777248383, "learning_rate": 1.639908576560091e-05, "loss": 0.5147, "step": 3825 }, { "epoch": 0.8534463528886906, "grad_norm": 0.16270814836025238, "learning_rate": 1.6397277093262583e-05, "loss": 0.5147, "step": 3826 }, { "epoch": 0.8536694178005799, "grad_norm": 0.16252903640270233, "learning_rate": 1.6395468066602812e-05, "loss": 0.5019, "step": 3827 }, { "epoch": 0.8538924827124693, "grad_norm": 0.16933606564998627, "learning_rate": 1.6393658685721787e-05, "loss": 0.5085, "step": 3828 }, { "epoch": 0.8541155476243587, "grad_norm": 0.17316611111164093, "learning_rate": 1.639184895071973e-05, "loss": 0.5141, "step": 3829 }, { "epoch": 0.8543386125362481, "grad_norm": 0.17270614206790924, "learning_rate": 1.6390038861696868e-05, "loss": 0.4894, "step": 3830 }, { "epoch": 0.8545616774481374, "grad_norm": 0.1740892380475998, "learning_rate": 1.638822841875346e-05, "loss": 0.5061, "step": 3831 }, { "epoch": 0.8547847423600268, "grad_norm": 0.18361349403858185, "learning_rate": 1.638641762198978e-05, "loss": 0.4868, "step": 3832 }, { "epoch": 0.8550078072719162, "grad_norm": 0.18127956986427307, "learning_rate": 1.638460647150612e-05, "loss": 0.5208, "step": 3833 }, { "epoch": 0.8552308721838054, "grad_norm": 0.1610213816165924, "learning_rate": 1.6382794967402792e-05, "loss": 0.4749, "step": 3834 }, { "epoch": 0.8554539370956948, "grad_norm": 0.16226479411125183, "learning_rate": 1.638098310978013e-05, "loss": 0.4872, "step": 3835 }, { "epoch": 0.8556770020075842, "grad_norm": 0.15979908406734467, "learning_rate": 1.6379170898738483e-05, "loss": 0.5036, "step": 3836 }, { "epoch": 0.8559000669194735, "grad_norm": 0.1549946367740631, "learning_rate": 1.6377358334378228e-05, "loss": 0.4858, "step": 3837 }, { "epoch": 0.8561231318313629, "grad_norm": 0.16230742633342743, "learning_rate": 1.6375545416799756e-05, "loss": 0.5084, "step": 3838 }, { "epoch": 0.8563461967432523, "grad_norm": 0.15997640788555145, "learning_rate": 1.6373732146103466e-05, "loss": 0.5162, "step": 3839 }, { "epoch": 0.8565692616551417, "grad_norm": 0.1616683304309845, "learning_rate": 1.6371918522389804e-05, "loss": 0.471, "step": 3840 }, { "epoch": 0.856792326567031, "grad_norm": 0.15914194285869598, "learning_rate": 1.637010454575921e-05, "loss": 0.5039, "step": 3841 }, { "epoch": 0.8570153914789204, "grad_norm": 0.16176219284534454, "learning_rate": 1.636829021631216e-05, "loss": 0.4903, "step": 3842 }, { "epoch": 0.8572384563908098, "grad_norm": 0.1591506451368332, "learning_rate": 1.636647553414914e-05, "loss": 0.4833, "step": 3843 }, { "epoch": 0.857461521302699, "grad_norm": 0.16955386102199554, "learning_rate": 1.6364660499370656e-05, "loss": 0.5088, "step": 3844 }, { "epoch": 0.8576845862145884, "grad_norm": 0.15826305747032166, "learning_rate": 1.636284511207724e-05, "loss": 0.4985, "step": 3845 }, { "epoch": 0.8579076511264778, "grad_norm": 0.16054044663906097, "learning_rate": 1.6361029372369433e-05, "loss": 0.4991, "step": 3846 }, { "epoch": 0.8581307160383672, "grad_norm": 0.16432225704193115, "learning_rate": 1.6359213280347814e-05, "loss": 0.4927, "step": 3847 }, { "epoch": 0.8583537809502565, "grad_norm": 0.17022813856601715, "learning_rate": 1.6357396836112957e-05, "loss": 0.4863, "step": 3848 }, { "epoch": 0.8585768458621459, "grad_norm": 0.15264475345611572, "learning_rate": 1.6355580039765478e-05, "loss": 0.4891, "step": 3849 }, { "epoch": 0.8587999107740353, "grad_norm": 0.1790938675403595, "learning_rate": 1.6353762891405993e-05, "loss": 0.517, "step": 3850 }, { "epoch": 0.8590229756859246, "grad_norm": 0.15989623963832855, "learning_rate": 1.6351945391135154e-05, "loss": 0.4684, "step": 3851 }, { "epoch": 0.859246040597814, "grad_norm": 0.16801711916923523, "learning_rate": 1.6350127539053626e-05, "loss": 0.4782, "step": 3852 }, { "epoch": 0.8594691055097033, "grad_norm": 0.15932013094425201, "learning_rate": 1.634830933526209e-05, "loss": 0.4805, "step": 3853 }, { "epoch": 0.8596921704215927, "grad_norm": 0.16254423558712006, "learning_rate": 1.6346490779861252e-05, "loss": 0.5063, "step": 3854 }, { "epoch": 0.859915235333482, "grad_norm": 0.1786786913871765, "learning_rate": 1.634467187295183e-05, "loss": 0.5457, "step": 3855 }, { "epoch": 0.8601383002453714, "grad_norm": 0.15343046188354492, "learning_rate": 1.6342852614634575e-05, "loss": 0.4681, "step": 3856 }, { "epoch": 0.8603613651572608, "grad_norm": 0.16010713577270508, "learning_rate": 1.634103300501024e-05, "loss": 0.5131, "step": 3857 }, { "epoch": 0.8605844300691501, "grad_norm": 0.1694883406162262, "learning_rate": 1.6339213044179612e-05, "loss": 0.5117, "step": 3858 }, { "epoch": 0.8608074949810395, "grad_norm": 0.1592147946357727, "learning_rate": 1.6337392732243488e-05, "loss": 0.5026, "step": 3859 }, { "epoch": 0.8610305598929289, "grad_norm": 0.15784434974193573, "learning_rate": 1.6335572069302694e-05, "loss": 0.4705, "step": 3860 }, { "epoch": 0.8612536248048182, "grad_norm": 0.15071116387844086, "learning_rate": 1.6333751055458065e-05, "loss": 0.4781, "step": 3861 }, { "epoch": 0.8614766897167075, "grad_norm": 0.16125838458538055, "learning_rate": 1.6331929690810464e-05, "loss": 0.5079, "step": 3862 }, { "epoch": 0.8616997546285969, "grad_norm": 0.16050268709659576, "learning_rate": 1.6330107975460764e-05, "loss": 0.4983, "step": 3863 }, { "epoch": 0.8619228195404863, "grad_norm": 0.15100105106830597, "learning_rate": 1.632828590950987e-05, "loss": 0.46, "step": 3864 }, { "epoch": 0.8621458844523756, "grad_norm": 0.17107254266738892, "learning_rate": 1.632646349305869e-05, "loss": 0.4708, "step": 3865 }, { "epoch": 0.862368949364265, "grad_norm": 0.15665309131145477, "learning_rate": 1.6324640726208172e-05, "loss": 0.4786, "step": 3866 }, { "epoch": 0.8625920142761544, "grad_norm": 0.1617649793624878, "learning_rate": 1.6322817609059267e-05, "loss": 0.5078, "step": 3867 }, { "epoch": 0.8628150791880437, "grad_norm": 0.15603747963905334, "learning_rate": 1.6320994141712948e-05, "loss": 0.4939, "step": 3868 }, { "epoch": 0.8630381440999331, "grad_norm": 0.14941559731960297, "learning_rate": 1.6319170324270212e-05, "loss": 0.4637, "step": 3869 }, { "epoch": 0.8632612090118225, "grad_norm": 0.1626354604959488, "learning_rate": 1.631734615683208e-05, "loss": 0.4715, "step": 3870 }, { "epoch": 0.8634842739237119, "grad_norm": 0.15691153705120087, "learning_rate": 1.6315521639499573e-05, "loss": 0.5017, "step": 3871 }, { "epoch": 0.8637073388356011, "grad_norm": 0.19187089800834656, "learning_rate": 1.6313696772373754e-05, "loss": 0.5369, "step": 3872 }, { "epoch": 0.8639304037474905, "grad_norm": 0.16823086142539978, "learning_rate": 1.6311871555555696e-05, "loss": 0.4892, "step": 3873 }, { "epoch": 0.8641534686593799, "grad_norm": 0.17205961048603058, "learning_rate": 1.6310045989146486e-05, "loss": 0.5472, "step": 3874 }, { "epoch": 0.8643765335712692, "grad_norm": 0.1610354632139206, "learning_rate": 1.6308220073247237e-05, "loss": 0.4887, "step": 3875 }, { "epoch": 0.8645995984831586, "grad_norm": 0.21832773089408875, "learning_rate": 1.6306393807959078e-05, "loss": 0.5081, "step": 3876 }, { "epoch": 0.864822663395048, "grad_norm": 0.17104685306549072, "learning_rate": 1.6304567193383164e-05, "loss": 0.5179, "step": 3877 }, { "epoch": 0.8650457283069373, "grad_norm": 0.15571469068527222, "learning_rate": 1.6302740229620662e-05, "loss": 0.4773, "step": 3878 }, { "epoch": 0.8652687932188267, "grad_norm": 0.16199520230293274, "learning_rate": 1.630091291677276e-05, "loss": 0.4972, "step": 3879 }, { "epoch": 0.865491858130716, "grad_norm": 0.17316444218158722, "learning_rate": 1.6299085254940664e-05, "loss": 0.4961, "step": 3880 }, { "epoch": 0.8657149230426054, "grad_norm": 0.16597145795822144, "learning_rate": 1.6297257244225602e-05, "loss": 0.4893, "step": 3881 }, { "epoch": 0.8659379879544947, "grad_norm": 0.1571418046951294, "learning_rate": 1.6295428884728827e-05, "loss": 0.4957, "step": 3882 }, { "epoch": 0.8661610528663841, "grad_norm": 0.15437713265419006, "learning_rate": 1.62936001765516e-05, "loss": 0.4912, "step": 3883 }, { "epoch": 0.8663841177782735, "grad_norm": 0.15803179144859314, "learning_rate": 1.6291771119795202e-05, "loss": 0.5012, "step": 3884 }, { "epoch": 0.8666071826901628, "grad_norm": 0.1526332050561905, "learning_rate": 1.628994171456095e-05, "loss": 0.48, "step": 3885 }, { "epoch": 0.8668302476020522, "grad_norm": 0.16080142557621002, "learning_rate": 1.628811196095016e-05, "loss": 0.4885, "step": 3886 }, { "epoch": 0.8670533125139416, "grad_norm": 0.16212552785873413, "learning_rate": 1.628628185906417e-05, "loss": 0.5049, "step": 3887 }, { "epoch": 0.867276377425831, "grad_norm": 0.1618081033229828, "learning_rate": 1.6284451409004352e-05, "loss": 0.4955, "step": 3888 }, { "epoch": 0.8674994423377203, "grad_norm": 0.15760111808776855, "learning_rate": 1.628262061087208e-05, "loss": 0.4808, "step": 3889 }, { "epoch": 0.8677225072496096, "grad_norm": 0.15690281987190247, "learning_rate": 1.6280789464768765e-05, "loss": 0.5198, "step": 3890 }, { "epoch": 0.867945572161499, "grad_norm": 0.17230992019176483, "learning_rate": 1.6278957970795818e-05, "loss": 0.4762, "step": 3891 }, { "epoch": 0.8681686370733883, "grad_norm": 0.15698418021202087, "learning_rate": 1.6277126129054687e-05, "loss": 0.4969, "step": 3892 }, { "epoch": 0.8683917019852777, "grad_norm": 0.15693382918834686, "learning_rate": 1.6275293939646822e-05, "loss": 0.4856, "step": 3893 }, { "epoch": 0.8686147668971671, "grad_norm": 0.1605808138847351, "learning_rate": 1.6273461402673706e-05, "loss": 0.4739, "step": 3894 }, { "epoch": 0.8688378318090564, "grad_norm": 0.2007637321949005, "learning_rate": 1.6271628518236836e-05, "loss": 0.5115, "step": 3895 }, { "epoch": 0.8690608967209458, "grad_norm": 0.17741620540618896, "learning_rate": 1.6269795286437728e-05, "loss": 0.5034, "step": 3896 }, { "epoch": 0.8692839616328352, "grad_norm": 0.1685972362756729, "learning_rate": 1.6267961707377923e-05, "loss": 0.5316, "step": 3897 }, { "epoch": 0.8695070265447246, "grad_norm": 0.16251643002033234, "learning_rate": 1.6266127781158965e-05, "loss": 0.5018, "step": 3898 }, { "epoch": 0.8697300914566138, "grad_norm": 0.15942230820655823, "learning_rate": 1.626429350788244e-05, "loss": 0.4799, "step": 3899 }, { "epoch": 0.8699531563685032, "grad_norm": 0.16205990314483643, "learning_rate": 1.6262458887649933e-05, "loss": 0.4721, "step": 3900 }, { "epoch": 0.8701762212803926, "grad_norm": 0.16467221081256866, "learning_rate": 1.6260623920563062e-05, "loss": 0.5137, "step": 3901 }, { "epoch": 0.8703992861922819, "grad_norm": 0.16359251737594604, "learning_rate": 1.6258788606723457e-05, "loss": 0.5063, "step": 3902 }, { "epoch": 0.8706223511041713, "grad_norm": 0.16454778611660004, "learning_rate": 1.625695294623277e-05, "loss": 0.4847, "step": 3903 }, { "epoch": 0.8708454160160607, "grad_norm": 0.1625732183456421, "learning_rate": 1.625511693919267e-05, "loss": 0.5324, "step": 3904 }, { "epoch": 0.8710684809279501, "grad_norm": 0.16343548893928528, "learning_rate": 1.625328058570485e-05, "loss": 0.5012, "step": 3905 }, { "epoch": 0.8712915458398394, "grad_norm": 0.15868264436721802, "learning_rate": 1.6251443885871013e-05, "loss": 0.4921, "step": 3906 }, { "epoch": 0.8715146107517288, "grad_norm": 0.1692667156457901, "learning_rate": 1.6249606839792897e-05, "loss": 0.4861, "step": 3907 }, { "epoch": 0.8717376756636182, "grad_norm": 0.15825539827346802, "learning_rate": 1.6247769447572235e-05, "loss": 0.4828, "step": 3908 }, { "epoch": 0.8719607405755074, "grad_norm": 0.1585260033607483, "learning_rate": 1.6245931709310806e-05, "loss": 0.4885, "step": 3909 }, { "epoch": 0.8721838054873968, "grad_norm": 0.1593310832977295, "learning_rate": 1.624409362511039e-05, "loss": 0.5158, "step": 3910 }, { "epoch": 0.8724068703992862, "grad_norm": 0.15775549411773682, "learning_rate": 1.624225519507279e-05, "loss": 0.4812, "step": 3911 }, { "epoch": 0.8726299353111755, "grad_norm": 0.16018901765346527, "learning_rate": 1.624041641929983e-05, "loss": 0.5123, "step": 3912 }, { "epoch": 0.8728530002230649, "grad_norm": 0.16512146592140198, "learning_rate": 1.6238577297893357e-05, "loss": 0.4886, "step": 3913 }, { "epoch": 0.8730760651349543, "grad_norm": 0.16052314639091492, "learning_rate": 1.6236737830955233e-05, "loss": 0.5078, "step": 3914 }, { "epoch": 0.8732991300468437, "grad_norm": 0.16342321038246155, "learning_rate": 1.6234898018587336e-05, "loss": 0.5151, "step": 3915 }, { "epoch": 0.873522194958733, "grad_norm": 0.16125398874282837, "learning_rate": 1.6233057860891566e-05, "loss": 0.4786, "step": 3916 }, { "epoch": 0.8737452598706223, "grad_norm": 0.17200647294521332, "learning_rate": 1.623121735796985e-05, "loss": 0.5013, "step": 3917 }, { "epoch": 0.8739683247825117, "grad_norm": 0.17798694968223572, "learning_rate": 1.6229376509924116e-05, "loss": 0.5257, "step": 3918 }, { "epoch": 0.874191389694401, "grad_norm": 0.15707524120807648, "learning_rate": 1.6227535316856326e-05, "loss": 0.4889, "step": 3919 }, { "epoch": 0.8744144546062904, "grad_norm": 0.16776537895202637, "learning_rate": 1.622569377886846e-05, "loss": 0.5196, "step": 3920 }, { "epoch": 0.8746375195181798, "grad_norm": 0.16779324412345886, "learning_rate": 1.622385189606251e-05, "loss": 0.512, "step": 3921 }, { "epoch": 0.8748605844300692, "grad_norm": 0.16673165559768677, "learning_rate": 1.622200966854049e-05, "loss": 0.4888, "step": 3922 }, { "epoch": 0.8750836493419585, "grad_norm": 0.18797388672828674, "learning_rate": 1.622016709640444e-05, "loss": 0.4789, "step": 3923 }, { "epoch": 0.8753067142538479, "grad_norm": 0.21027544140815735, "learning_rate": 1.621832417975641e-05, "loss": 0.4818, "step": 3924 }, { "epoch": 0.8755297791657373, "grad_norm": 0.16859135031700134, "learning_rate": 1.621648091869847e-05, "loss": 0.4949, "step": 3925 }, { "epoch": 0.8757528440776265, "grad_norm": 0.1632431447505951, "learning_rate": 1.6214637313332714e-05, "loss": 0.4869, "step": 3926 }, { "epoch": 0.8759759089895159, "grad_norm": 0.1545240879058838, "learning_rate": 1.6212793363761253e-05, "loss": 0.495, "step": 3927 }, { "epoch": 0.8761989739014053, "grad_norm": 0.162541463971138, "learning_rate": 1.621094907008621e-05, "loss": 0.507, "step": 3928 }, { "epoch": 0.8764220388132947, "grad_norm": 0.16767984628677368, "learning_rate": 1.6209104432409745e-05, "loss": 0.5098, "step": 3929 }, { "epoch": 0.876645103725184, "grad_norm": 0.17964600026607513, "learning_rate": 1.6207259450834022e-05, "loss": 0.5055, "step": 3930 }, { "epoch": 0.8768681686370734, "grad_norm": 0.16593998670578003, "learning_rate": 1.620541412546122e-05, "loss": 0.4944, "step": 3931 }, { "epoch": 0.8770912335489628, "grad_norm": 0.15954278409481049, "learning_rate": 1.6203568456393554e-05, "loss": 0.5158, "step": 3932 }, { "epoch": 0.8773142984608521, "grad_norm": 0.1770762950181961, "learning_rate": 1.620172244373324e-05, "loss": 0.4886, "step": 3933 }, { "epoch": 0.8775373633727415, "grad_norm": 0.16538506746292114, "learning_rate": 1.619987608758253e-05, "loss": 0.4563, "step": 3934 }, { "epoch": 0.8777604282846309, "grad_norm": 0.1733943372964859, "learning_rate": 1.6198029388043685e-05, "loss": 0.5131, "step": 3935 }, { "epoch": 0.8779834931965201, "grad_norm": 0.15919508039951324, "learning_rate": 1.619618234521898e-05, "loss": 0.4904, "step": 3936 }, { "epoch": 0.8782065581084095, "grad_norm": 0.1546212136745453, "learning_rate": 1.6194334959210726e-05, "loss": 0.4827, "step": 3937 }, { "epoch": 0.8784296230202989, "grad_norm": 0.16514717042446136, "learning_rate": 1.6192487230121236e-05, "loss": 0.501, "step": 3938 }, { "epoch": 0.8786526879321883, "grad_norm": 0.1485379934310913, "learning_rate": 1.6190639158052852e-05, "loss": 0.47, "step": 3939 }, { "epoch": 0.8788757528440776, "grad_norm": 0.1519124060869217, "learning_rate": 1.618879074310793e-05, "loss": 0.484, "step": 3940 }, { "epoch": 0.879098817755967, "grad_norm": 0.16282908618450165, "learning_rate": 1.618694198538885e-05, "loss": 0.5071, "step": 3941 }, { "epoch": 0.8793218826678564, "grad_norm": 0.16473978757858276, "learning_rate": 1.6185092884998e-05, "loss": 0.5203, "step": 3942 }, { "epoch": 0.8795449475797457, "grad_norm": 0.15753906965255737, "learning_rate": 1.6183243442037807e-05, "loss": 0.4989, "step": 3943 }, { "epoch": 0.879768012491635, "grad_norm": 0.1654537469148636, "learning_rate": 1.6181393656610693e-05, "loss": 0.4749, "step": 3944 }, { "epoch": 0.8799910774035244, "grad_norm": 0.17788641154766083, "learning_rate": 1.6179543528819116e-05, "loss": 0.4961, "step": 3945 }, { "epoch": 0.8802141423154138, "grad_norm": 0.16031092405319214, "learning_rate": 1.617769305876555e-05, "loss": 0.5006, "step": 3946 }, { "epoch": 0.8804372072273031, "grad_norm": 0.16216473281383514, "learning_rate": 1.6175842246552484e-05, "loss": 0.5012, "step": 3947 }, { "epoch": 0.8806602721391925, "grad_norm": 0.3736167848110199, "learning_rate": 1.6173991092282424e-05, "loss": 0.481, "step": 3948 }, { "epoch": 0.8808833370510819, "grad_norm": 0.16511203348636627, "learning_rate": 1.6172139596057902e-05, "loss": 0.4837, "step": 3949 }, { "epoch": 0.8811064019629712, "grad_norm": 0.16273178160190582, "learning_rate": 1.6170287757981468e-05, "loss": 0.494, "step": 3950 }, { "epoch": 0.8813294668748606, "grad_norm": 0.17184849083423615, "learning_rate": 1.616843557815568e-05, "loss": 0.5257, "step": 3951 }, { "epoch": 0.88155253178675, "grad_norm": 0.16385000944137573, "learning_rate": 1.6166583056683132e-05, "loss": 0.5086, "step": 3952 }, { "epoch": 0.8817755966986393, "grad_norm": 0.16628234088420868, "learning_rate": 1.6164730193666423e-05, "loss": 0.5035, "step": 3953 }, { "epoch": 0.8819986616105286, "grad_norm": 0.16098076105117798, "learning_rate": 1.616287698920818e-05, "loss": 0.4979, "step": 3954 }, { "epoch": 0.882221726522418, "grad_norm": 0.16197021305561066, "learning_rate": 1.6161023443411044e-05, "loss": 0.4883, "step": 3955 }, { "epoch": 0.8824447914343074, "grad_norm": 0.16163140535354614, "learning_rate": 1.6159169556377672e-05, "loss": 0.4899, "step": 3956 }, { "epoch": 0.8826678563461967, "grad_norm": 0.16022367775440216, "learning_rate": 1.615731532821075e-05, "loss": 0.4832, "step": 3957 }, { "epoch": 0.8828909212580861, "grad_norm": 0.16164539754390717, "learning_rate": 1.615546075901297e-05, "loss": 0.5016, "step": 3958 }, { "epoch": 0.8831139861699755, "grad_norm": 0.17649579048156738, "learning_rate": 1.615360584888706e-05, "loss": 0.4918, "step": 3959 }, { "epoch": 0.8833370510818648, "grad_norm": 0.16985541582107544, "learning_rate": 1.6151750597935746e-05, "loss": 0.4947, "step": 3960 }, { "epoch": 0.8835601159937542, "grad_norm": 0.1681845337152481, "learning_rate": 1.6149895006261788e-05, "loss": 0.5169, "step": 3961 }, { "epoch": 0.8837831809056436, "grad_norm": 0.1974424421787262, "learning_rate": 1.6148039073967964e-05, "loss": 0.5077, "step": 3962 }, { "epoch": 0.884006245817533, "grad_norm": 0.1644642949104309, "learning_rate": 1.614618280115706e-05, "loss": 0.4981, "step": 3963 }, { "epoch": 0.8842293107294222, "grad_norm": 0.16741390526294708, "learning_rate": 1.6144326187931893e-05, "loss": 0.4821, "step": 3964 }, { "epoch": 0.8844523756413116, "grad_norm": 0.17722152173519135, "learning_rate": 1.614246923439529e-05, "loss": 0.4782, "step": 3965 }, { "epoch": 0.884675440553201, "grad_norm": 0.17287255823612213, "learning_rate": 1.6140611940650104e-05, "loss": 0.4996, "step": 3966 }, { "epoch": 0.8848985054650903, "grad_norm": 0.17982237040996552, "learning_rate": 1.6138754306799206e-05, "loss": 0.4567, "step": 3967 }, { "epoch": 0.8851215703769797, "grad_norm": 0.1712944060564041, "learning_rate": 1.6136896332945474e-05, "loss": 0.4789, "step": 3968 }, { "epoch": 0.8853446352888691, "grad_norm": 0.16616348922252655, "learning_rate": 1.6135038019191823e-05, "loss": 0.509, "step": 3969 }, { "epoch": 0.8855677002007584, "grad_norm": 0.16956603527069092, "learning_rate": 1.6133179365641178e-05, "loss": 0.4906, "step": 3970 }, { "epoch": 0.8857907651126478, "grad_norm": 0.15731406211853027, "learning_rate": 1.613132037239648e-05, "loss": 0.4911, "step": 3971 }, { "epoch": 0.8860138300245372, "grad_norm": 0.16865961253643036, "learning_rate": 1.6129461039560693e-05, "loss": 0.5072, "step": 3972 }, { "epoch": 0.8862368949364265, "grad_norm": 0.16574735939502716, "learning_rate": 1.6127601367236793e-05, "loss": 0.4807, "step": 3973 }, { "epoch": 0.8864599598483158, "grad_norm": 0.16178300976753235, "learning_rate": 1.6125741355527788e-05, "loss": 0.5138, "step": 3974 }, { "epoch": 0.8866830247602052, "grad_norm": 0.163814976811409, "learning_rate": 1.6123881004536696e-05, "loss": 0.4999, "step": 3975 }, { "epoch": 0.8869060896720946, "grad_norm": 0.1648682951927185, "learning_rate": 1.612202031436655e-05, "loss": 0.4621, "step": 3976 }, { "epoch": 0.8871291545839839, "grad_norm": 0.1537158489227295, "learning_rate": 1.6120159285120417e-05, "loss": 0.4678, "step": 3977 }, { "epoch": 0.8873522194958733, "grad_norm": 0.17030583322048187, "learning_rate": 1.6118297916901357e-05, "loss": 0.5142, "step": 3978 }, { "epoch": 0.8875752844077627, "grad_norm": 0.15777969360351562, "learning_rate": 1.6116436209812476e-05, "loss": 0.4857, "step": 3979 }, { "epoch": 0.8877983493196521, "grad_norm": 0.16635335981845856, "learning_rate": 1.6114574163956883e-05, "loss": 0.4945, "step": 3980 }, { "epoch": 0.8880214142315414, "grad_norm": 0.16205914318561554, "learning_rate": 1.611271177943771e-05, "loss": 0.5013, "step": 3981 }, { "epoch": 0.8882444791434307, "grad_norm": 0.16518031060695648, "learning_rate": 1.6110849056358112e-05, "loss": 0.492, "step": 3982 }, { "epoch": 0.8884675440553201, "grad_norm": 0.17391857504844666, "learning_rate": 1.610898599482125e-05, "loss": 0.5147, "step": 3983 }, { "epoch": 0.8886906089672094, "grad_norm": 0.16679394245147705, "learning_rate": 1.610712259493032e-05, "loss": 0.5079, "step": 3984 }, { "epoch": 0.8889136738790988, "grad_norm": 0.15235210955142975, "learning_rate": 1.6105258856788525e-05, "loss": 0.4616, "step": 3985 }, { "epoch": 0.8891367387909882, "grad_norm": 0.15558315813541412, "learning_rate": 1.6103394780499088e-05, "loss": 0.5129, "step": 3986 }, { "epoch": 0.8893598037028775, "grad_norm": 0.16309772431850433, "learning_rate": 1.610153036616526e-05, "loss": 0.4981, "step": 3987 }, { "epoch": 0.8895828686147669, "grad_norm": 0.1611076146364212, "learning_rate": 1.60996656138903e-05, "loss": 0.4858, "step": 3988 }, { "epoch": 0.8898059335266563, "grad_norm": 0.16238349676132202, "learning_rate": 1.6097800523777487e-05, "loss": 0.5327, "step": 3989 }, { "epoch": 0.8900289984385457, "grad_norm": 0.1624278724193573, "learning_rate": 1.6095935095930125e-05, "loss": 0.4988, "step": 3990 }, { "epoch": 0.8902520633504349, "grad_norm": 0.16069476306438446, "learning_rate": 1.609406933045153e-05, "loss": 0.5061, "step": 3991 }, { "epoch": 0.8904751282623243, "grad_norm": 0.17323002219200134, "learning_rate": 1.6092203227445046e-05, "loss": 0.5126, "step": 3992 }, { "epoch": 0.8906981931742137, "grad_norm": 0.16106821596622467, "learning_rate": 1.6090336787014028e-05, "loss": 0.5075, "step": 3993 }, { "epoch": 0.890921258086103, "grad_norm": 0.20618608593940735, "learning_rate": 1.6088470009261846e-05, "loss": 0.4934, "step": 3994 }, { "epoch": 0.8911443229979924, "grad_norm": 0.15415968000888824, "learning_rate": 1.6086602894291895e-05, "loss": 0.4685, "step": 3995 }, { "epoch": 0.8913673879098818, "grad_norm": 0.16283094882965088, "learning_rate": 1.608473544220759e-05, "loss": 0.4893, "step": 3996 }, { "epoch": 0.8915904528217712, "grad_norm": 0.15671685338020325, "learning_rate": 1.6082867653112365e-05, "loss": 0.4866, "step": 3997 }, { "epoch": 0.8918135177336605, "grad_norm": 0.14689774811267853, "learning_rate": 1.6080999527109665e-05, "loss": 0.4596, "step": 3998 }, { "epoch": 0.8920365826455499, "grad_norm": 0.1579175591468811, "learning_rate": 1.6079131064302958e-05, "loss": 0.5135, "step": 3999 }, { "epoch": 0.8922596475574393, "grad_norm": 0.19000330567359924, "learning_rate": 1.6077262264795735e-05, "loss": 0.4955, "step": 4000 }, { "epoch": 0.8924827124693285, "grad_norm": 0.17095667123794556, "learning_rate": 1.6075393128691497e-05, "loss": 0.492, "step": 4001 }, { "epoch": 0.8927057773812179, "grad_norm": 0.1627168506383896, "learning_rate": 1.6073523656093778e-05, "loss": 0.4954, "step": 4002 }, { "epoch": 0.8929288422931073, "grad_norm": 0.16945117712020874, "learning_rate": 1.6071653847106113e-05, "loss": 0.5073, "step": 4003 }, { "epoch": 0.8931519072049967, "grad_norm": 0.16621045768260956, "learning_rate": 1.6069783701832066e-05, "loss": 0.509, "step": 4004 }, { "epoch": 0.893374972116886, "grad_norm": 0.16034524142742157, "learning_rate": 1.6067913220375216e-05, "loss": 0.4839, "step": 4005 }, { "epoch": 0.8935980370287754, "grad_norm": 0.15669786930084229, "learning_rate": 1.6066042402839163e-05, "loss": 0.4953, "step": 4006 }, { "epoch": 0.8938211019406648, "grad_norm": 0.15480557084083557, "learning_rate": 1.606417124932752e-05, "loss": 0.459, "step": 4007 }, { "epoch": 0.8940441668525541, "grad_norm": 0.15904684364795685, "learning_rate": 1.6062299759943938e-05, "loss": 0.4977, "step": 4008 }, { "epoch": 0.8942672317644434, "grad_norm": 0.16506803035736084, "learning_rate": 1.6060427934792056e-05, "loss": 0.4792, "step": 4009 }, { "epoch": 0.8944902966763328, "grad_norm": 0.16208292543888092, "learning_rate": 1.6058555773975552e-05, "loss": 0.5113, "step": 4010 }, { "epoch": 0.8947133615882221, "grad_norm": 0.15986153483390808, "learning_rate": 1.6056683277598123e-05, "loss": 0.4839, "step": 4011 }, { "epoch": 0.8949364265001115, "grad_norm": 0.17647914588451385, "learning_rate": 1.6054810445763474e-05, "loss": 0.4845, "step": 4012 }, { "epoch": 0.8951594914120009, "grad_norm": 0.16568545997142792, "learning_rate": 1.6052937278575338e-05, "loss": 0.5114, "step": 4013 }, { "epoch": 0.8953825563238903, "grad_norm": 0.16345763206481934, "learning_rate": 1.605106377613746e-05, "loss": 0.5004, "step": 4014 }, { "epoch": 0.8956056212357796, "grad_norm": 0.15674430131912231, "learning_rate": 1.6049189938553606e-05, "loss": 0.4745, "step": 4015 }, { "epoch": 0.895828686147669, "grad_norm": 0.25220704078674316, "learning_rate": 1.6047315765927566e-05, "loss": 0.4988, "step": 4016 }, { "epoch": 0.8960517510595584, "grad_norm": 0.16052506864070892, "learning_rate": 1.6045441258363138e-05, "loss": 0.468, "step": 4017 }, { "epoch": 0.8962748159714476, "grad_norm": 0.1760246306657791, "learning_rate": 1.6043566415964145e-05, "loss": 0.5213, "step": 4018 }, { "epoch": 0.896497880883337, "grad_norm": 0.16761453449726105, "learning_rate": 1.6041691238834426e-05, "loss": 0.5223, "step": 4019 }, { "epoch": 0.8967209457952264, "grad_norm": 0.1628294140100479, "learning_rate": 1.6039815727077845e-05, "loss": 0.4642, "step": 4020 }, { "epoch": 0.8969440107071158, "grad_norm": 0.16300807893276215, "learning_rate": 1.6037939880798277e-05, "loss": 0.4907, "step": 4021 }, { "epoch": 0.8971670756190051, "grad_norm": 0.16601742804050446, "learning_rate": 1.603606370009962e-05, "loss": 0.465, "step": 4022 }, { "epoch": 0.8973901405308945, "grad_norm": 0.16860070824623108, "learning_rate": 1.6034187185085783e-05, "loss": 0.4817, "step": 4023 }, { "epoch": 0.8976132054427839, "grad_norm": 0.16450795531272888, "learning_rate": 1.6032310335860706e-05, "loss": 0.4991, "step": 4024 }, { "epoch": 0.8978362703546732, "grad_norm": 0.18083292245864868, "learning_rate": 1.603043315252834e-05, "loss": 0.49, "step": 4025 }, { "epoch": 0.8980593352665626, "grad_norm": 0.16025912761688232, "learning_rate": 1.6028555635192648e-05, "loss": 0.492, "step": 4026 }, { "epoch": 0.898282400178452, "grad_norm": 0.17619994282722473, "learning_rate": 1.6026677783957626e-05, "loss": 0.5244, "step": 4027 }, { "epoch": 0.8985054650903412, "grad_norm": 0.16059327125549316, "learning_rate": 1.602479959892728e-05, "loss": 0.4875, "step": 4028 }, { "epoch": 0.8987285300022306, "grad_norm": 0.166759192943573, "learning_rate": 1.6022921080205634e-05, "loss": 0.4953, "step": 4029 }, { "epoch": 0.89895159491412, "grad_norm": 0.17170590162277222, "learning_rate": 1.602104222789673e-05, "loss": 0.5209, "step": 4030 }, { "epoch": 0.8991746598260094, "grad_norm": 0.17768704891204834, "learning_rate": 1.601916304210464e-05, "loss": 0.4861, "step": 4031 }, { "epoch": 0.8993977247378987, "grad_norm": 0.1798562854528427, "learning_rate": 1.6017283522933432e-05, "loss": 0.5049, "step": 4032 }, { "epoch": 0.8996207896497881, "grad_norm": 0.15688154101371765, "learning_rate": 1.6015403670487216e-05, "loss": 0.4527, "step": 4033 }, { "epoch": 0.8998438545616775, "grad_norm": 0.1658952832221985, "learning_rate": 1.6013523484870107e-05, "loss": 0.4687, "step": 4034 }, { "epoch": 0.9000669194735668, "grad_norm": 0.20666684210300446, "learning_rate": 1.6011642966186237e-05, "loss": 0.4883, "step": 4035 }, { "epoch": 0.9002899843854562, "grad_norm": 0.1706281453371048, "learning_rate": 1.600976211453977e-05, "loss": 0.4989, "step": 4036 }, { "epoch": 0.9005130492973455, "grad_norm": 0.16239850223064423, "learning_rate": 1.600788093003487e-05, "loss": 0.4944, "step": 4037 }, { "epoch": 0.9007361142092349, "grad_norm": 0.17332103848457336, "learning_rate": 1.6005999412775736e-05, "loss": 0.5225, "step": 4038 }, { "epoch": 0.9009591791211242, "grad_norm": 0.16437414288520813, "learning_rate": 1.600411756286657e-05, "loss": 0.5197, "step": 4039 }, { "epoch": 0.9011822440330136, "grad_norm": 0.1621236801147461, "learning_rate": 1.6002235380411614e-05, "loss": 0.4943, "step": 4040 }, { "epoch": 0.901405308944903, "grad_norm": 0.17484545707702637, "learning_rate": 1.60003528655151e-05, "loss": 0.5032, "step": 4041 }, { "epoch": 0.9016283738567923, "grad_norm": 0.16205301880836487, "learning_rate": 1.5998470018281303e-05, "loss": 0.4848, "step": 4042 }, { "epoch": 0.9018514387686817, "grad_norm": 0.15417253971099854, "learning_rate": 1.5996586838814505e-05, "loss": 0.4656, "step": 4043 }, { "epoch": 0.9020745036805711, "grad_norm": 0.15824946761131287, "learning_rate": 1.5994703327219008e-05, "loss": 0.4926, "step": 4044 }, { "epoch": 0.9022975685924604, "grad_norm": 0.17363391816616058, "learning_rate": 1.5992819483599132e-05, "loss": 0.5254, "step": 4045 }, { "epoch": 0.9025206335043497, "grad_norm": 0.16660279035568237, "learning_rate": 1.599093530805922e-05, "loss": 0.5396, "step": 4046 }, { "epoch": 0.9027436984162391, "grad_norm": 0.16047632694244385, "learning_rate": 1.5989050800703622e-05, "loss": 0.4782, "step": 4047 }, { "epoch": 0.9029667633281285, "grad_norm": 0.1667635589838028, "learning_rate": 1.5987165961636718e-05, "loss": 0.5084, "step": 4048 }, { "epoch": 0.9031898282400178, "grad_norm": 0.16391973197460175, "learning_rate": 1.5985280790962903e-05, "loss": 0.4967, "step": 4049 }, { "epoch": 0.9034128931519072, "grad_norm": 0.15799559652805328, "learning_rate": 1.598339528878659e-05, "loss": 0.4929, "step": 4050 }, { "epoch": 0.9036359580637966, "grad_norm": 0.15577222406864166, "learning_rate": 1.5981509455212207e-05, "loss": 0.4766, "step": 4051 }, { "epoch": 0.9038590229756859, "grad_norm": 0.15642912685871124, "learning_rate": 1.5979623290344207e-05, "loss": 0.4714, "step": 4052 }, { "epoch": 0.9040820878875753, "grad_norm": 0.1563492864370346, "learning_rate": 1.5977736794287057e-05, "loss": 0.4891, "step": 4053 }, { "epoch": 0.9043051527994647, "grad_norm": 0.16570942103862762, "learning_rate": 1.597584996714524e-05, "loss": 0.4857, "step": 4054 }, { "epoch": 0.904528217711354, "grad_norm": 0.16158944368362427, "learning_rate": 1.5973962809023258e-05, "loss": 0.487, "step": 4055 }, { "epoch": 0.9047512826232433, "grad_norm": 0.16446934640407562, "learning_rate": 1.5972075320025643e-05, "loss": 0.5012, "step": 4056 }, { "epoch": 0.9049743475351327, "grad_norm": 0.20039531588554382, "learning_rate": 1.597018750025693e-05, "loss": 0.5092, "step": 4057 }, { "epoch": 0.9051974124470221, "grad_norm": 0.16635029017925262, "learning_rate": 1.5968299349821678e-05, "loss": 0.5425, "step": 4058 }, { "epoch": 0.9054204773589114, "grad_norm": 0.162612184882164, "learning_rate": 1.596641086882447e-05, "loss": 0.4561, "step": 4059 }, { "epoch": 0.9056435422708008, "grad_norm": 0.15874888002872467, "learning_rate": 1.5964522057369897e-05, "loss": 0.503, "step": 4060 }, { "epoch": 0.9058666071826902, "grad_norm": 0.16984137892723083, "learning_rate": 1.596263291556257e-05, "loss": 0.4931, "step": 4061 }, { "epoch": 0.9060896720945795, "grad_norm": 0.15440818667411804, "learning_rate": 1.5960743443507128e-05, "loss": 0.4834, "step": 4062 }, { "epoch": 0.9063127370064689, "grad_norm": 0.17957209050655365, "learning_rate": 1.595885364130822e-05, "loss": 0.5049, "step": 4063 }, { "epoch": 0.9065358019183583, "grad_norm": 0.15549324452877045, "learning_rate": 1.5956963509070513e-05, "loss": 0.4733, "step": 4064 }, { "epoch": 0.9067588668302476, "grad_norm": 0.1653992384672165, "learning_rate": 1.59550730468987e-05, "loss": 0.4925, "step": 4065 }, { "epoch": 0.9069819317421369, "grad_norm": 0.1693269908428192, "learning_rate": 1.5953182254897478e-05, "loss": 0.4908, "step": 4066 }, { "epoch": 0.9072049966540263, "grad_norm": 0.16473425924777985, "learning_rate": 1.5951291133171577e-05, "loss": 0.4741, "step": 4067 }, { "epoch": 0.9074280615659157, "grad_norm": 0.16339989006519318, "learning_rate": 1.5949399681825738e-05, "loss": 0.4574, "step": 4068 }, { "epoch": 0.907651126477805, "grad_norm": 0.16389763355255127, "learning_rate": 1.5947507900964723e-05, "loss": 0.4951, "step": 4069 }, { "epoch": 0.9078741913896944, "grad_norm": 0.16346803307533264, "learning_rate": 1.594561579069331e-05, "loss": 0.4794, "step": 4070 }, { "epoch": 0.9080972563015838, "grad_norm": 0.16627097129821777, "learning_rate": 1.5943723351116293e-05, "loss": 0.4726, "step": 4071 }, { "epoch": 0.9083203212134732, "grad_norm": 0.17151188850402832, "learning_rate": 1.5941830582338488e-05, "loss": 0.5246, "step": 4072 }, { "epoch": 0.9085433861253625, "grad_norm": 0.15683230757713318, "learning_rate": 1.593993748446473e-05, "loss": 0.4771, "step": 4073 }, { "epoch": 0.9087664510372518, "grad_norm": 0.1879899501800537, "learning_rate": 1.5938044057599873e-05, "loss": 0.5102, "step": 4074 }, { "epoch": 0.9089895159491412, "grad_norm": 0.16234296560287476, "learning_rate": 1.593615030184878e-05, "loss": 0.4906, "step": 4075 }, { "epoch": 0.9092125808610305, "grad_norm": 0.15749451518058777, "learning_rate": 1.593425621731635e-05, "loss": 0.4867, "step": 4076 }, { "epoch": 0.9094356457729199, "grad_norm": 0.1519862711429596, "learning_rate": 1.593236180410748e-05, "loss": 0.4644, "step": 4077 }, { "epoch": 0.9096587106848093, "grad_norm": 0.20612524449825287, "learning_rate": 1.5930467062327096e-05, "loss": 0.5017, "step": 4078 }, { "epoch": 0.9098817755966987, "grad_norm": 0.16134196519851685, "learning_rate": 1.5928571992080142e-05, "loss": 0.4801, "step": 4079 }, { "epoch": 0.910104840508588, "grad_norm": 0.15148447453975677, "learning_rate": 1.592667659347158e-05, "loss": 0.4697, "step": 4080 }, { "epoch": 0.9103279054204774, "grad_norm": 0.1598382443189621, "learning_rate": 1.5924780866606387e-05, "loss": 0.4976, "step": 4081 }, { "epoch": 0.9105509703323668, "grad_norm": 0.15935340523719788, "learning_rate": 1.592288481158956e-05, "loss": 0.4806, "step": 4082 }, { "epoch": 0.910774035244256, "grad_norm": 0.16446979343891144, "learning_rate": 1.5920988428526117e-05, "loss": 0.4966, "step": 4083 }, { "epoch": 0.9109971001561454, "grad_norm": 0.15920083224773407, "learning_rate": 1.591909171752109e-05, "loss": 0.4917, "step": 4084 }, { "epoch": 0.9112201650680348, "grad_norm": 0.16009603440761566, "learning_rate": 1.5917194678679532e-05, "loss": 0.5169, "step": 4085 }, { "epoch": 0.9114432299799241, "grad_norm": 0.17751926183700562, "learning_rate": 1.5915297312106513e-05, "loss": 0.4804, "step": 4086 }, { "epoch": 0.9116662948918135, "grad_norm": 0.18244434893131256, "learning_rate": 1.5913399617907116e-05, "loss": 0.487, "step": 4087 }, { "epoch": 0.9118893598037029, "grad_norm": 0.15786734223365784, "learning_rate": 1.5911501596186455e-05, "loss": 0.4986, "step": 4088 }, { "epoch": 0.9121124247155923, "grad_norm": 0.15740512311458588, "learning_rate": 1.5909603247049654e-05, "loss": 0.4895, "step": 4089 }, { "epoch": 0.9123354896274816, "grad_norm": 0.1518784463405609, "learning_rate": 1.5907704570601845e-05, "loss": 0.4522, "step": 4090 }, { "epoch": 0.912558554539371, "grad_norm": 0.1830586940050125, "learning_rate": 1.59058055669482e-05, "loss": 0.513, "step": 4091 }, { "epoch": 0.9127816194512604, "grad_norm": 0.16128897666931152, "learning_rate": 1.5903906236193892e-05, "loss": 0.4706, "step": 4092 }, { "epoch": 0.9130046843631496, "grad_norm": 0.17136983573436737, "learning_rate": 1.5902006578444123e-05, "loss": 0.4867, "step": 4093 }, { "epoch": 0.913227749275039, "grad_norm": 0.15806463360786438, "learning_rate": 1.59001065938041e-05, "loss": 0.5066, "step": 4094 }, { "epoch": 0.9134508141869284, "grad_norm": 0.17267145216464996, "learning_rate": 1.5898206282379063e-05, "loss": 0.4805, "step": 4095 }, { "epoch": 0.9136738790988178, "grad_norm": 0.15600056946277618, "learning_rate": 1.5896305644274262e-05, "loss": 0.4865, "step": 4096 }, { "epoch": 0.9138969440107071, "grad_norm": 0.1608215868473053, "learning_rate": 1.5894404679594963e-05, "loss": 0.5102, "step": 4097 }, { "epoch": 0.9141200089225965, "grad_norm": 0.15523095428943634, "learning_rate": 1.5892503388446456e-05, "loss": 0.4642, "step": 4098 }, { "epoch": 0.9143430738344859, "grad_norm": 0.16346514225006104, "learning_rate": 1.589060177093405e-05, "loss": 0.4655, "step": 4099 }, { "epoch": 0.9145661387463752, "grad_norm": 0.1508352905511856, "learning_rate": 1.588869982716306e-05, "loss": 0.4826, "step": 4100 }, { "epoch": 0.9147892036582645, "grad_norm": 0.1622077375650406, "learning_rate": 1.5886797557238832e-05, "loss": 0.4876, "step": 4101 }, { "epoch": 0.9150122685701539, "grad_norm": 0.17161275446414948, "learning_rate": 1.588489496126673e-05, "loss": 0.4963, "step": 4102 }, { "epoch": 0.9152353334820432, "grad_norm": 0.15936551988124847, "learning_rate": 1.5882992039352122e-05, "loss": 0.4768, "step": 4103 }, { "epoch": 0.9154583983939326, "grad_norm": 0.1664397269487381, "learning_rate": 1.588108879160041e-05, "loss": 0.4927, "step": 4104 }, { "epoch": 0.915681463305822, "grad_norm": 0.16500705480575562, "learning_rate": 1.5879185218117012e-05, "loss": 0.4909, "step": 4105 }, { "epoch": 0.9159045282177114, "grad_norm": 0.16480299830436707, "learning_rate": 1.5877281319007352e-05, "loss": 0.468, "step": 4106 }, { "epoch": 0.9161275931296007, "grad_norm": 0.17092856764793396, "learning_rate": 1.5875377094376883e-05, "loss": 0.5188, "step": 4107 }, { "epoch": 0.9163506580414901, "grad_norm": 0.16929516196250916, "learning_rate": 1.5873472544331073e-05, "loss": 0.4932, "step": 4108 }, { "epoch": 0.9165737229533795, "grad_norm": 0.1531393676996231, "learning_rate": 1.5871567668975406e-05, "loss": 0.4512, "step": 4109 }, { "epoch": 0.9167967878652687, "grad_norm": 0.15918409824371338, "learning_rate": 1.586966246841539e-05, "loss": 0.5019, "step": 4110 }, { "epoch": 0.9170198527771581, "grad_norm": 0.19668515026569366, "learning_rate": 1.5867756942756548e-05, "loss": 0.5106, "step": 4111 }, { "epoch": 0.9172429176890475, "grad_norm": 0.15807564556598663, "learning_rate": 1.5865851092104414e-05, "loss": 0.4738, "step": 4112 }, { "epoch": 0.9174659826009369, "grad_norm": 0.16922055184841156, "learning_rate": 1.586394491656455e-05, "loss": 0.5086, "step": 4113 }, { "epoch": 0.9176890475128262, "grad_norm": 0.18552662432193756, "learning_rate": 1.586203841624253e-05, "loss": 0.5246, "step": 4114 }, { "epoch": 0.9179121124247156, "grad_norm": 0.16202445328235626, "learning_rate": 1.5860131591243945e-05, "loss": 0.5113, "step": 4115 }, { "epoch": 0.918135177336605, "grad_norm": 0.16352488100528717, "learning_rate": 1.5858224441674416e-05, "loss": 0.4739, "step": 4116 }, { "epoch": 0.9183582422484943, "grad_norm": 0.16634705662727356, "learning_rate": 1.5856316967639566e-05, "loss": 0.5221, "step": 4117 }, { "epoch": 0.9185813071603837, "grad_norm": 0.16568778455257416, "learning_rate": 1.5854409169245043e-05, "loss": 0.4476, "step": 4118 }, { "epoch": 0.9188043720722731, "grad_norm": 0.160440593957901, "learning_rate": 1.5852501046596516e-05, "loss": 0.5054, "step": 4119 }, { "epoch": 0.9190274369841623, "grad_norm": 0.1603170782327652, "learning_rate": 1.5850592599799668e-05, "loss": 0.4947, "step": 4120 }, { "epoch": 0.9192505018960517, "grad_norm": 0.16584816575050354, "learning_rate": 1.5848683828960195e-05, "loss": 0.5014, "step": 4121 }, { "epoch": 0.9194735668079411, "grad_norm": 0.1588207632303238, "learning_rate": 1.584677473418383e-05, "loss": 0.4973, "step": 4122 }, { "epoch": 0.9196966317198305, "grad_norm": 0.1574103981256485, "learning_rate": 1.5844865315576296e-05, "loss": 0.5214, "step": 4123 }, { "epoch": 0.9199196966317198, "grad_norm": 0.17080260813236237, "learning_rate": 1.584295557324336e-05, "loss": 0.5067, "step": 4124 }, { "epoch": 0.9201427615436092, "grad_norm": 0.15937574207782745, "learning_rate": 1.584104550729079e-05, "loss": 0.4971, "step": 4125 }, { "epoch": 0.9203658264554986, "grad_norm": 0.15593832731246948, "learning_rate": 1.5839135117824375e-05, "loss": 0.4844, "step": 4126 }, { "epoch": 0.9205888913673879, "grad_norm": 0.18794505298137665, "learning_rate": 1.583722440494993e-05, "loss": 0.5135, "step": 4127 }, { "epoch": 0.9208119562792773, "grad_norm": 0.183371901512146, "learning_rate": 1.5835313368773276e-05, "loss": 0.4861, "step": 4128 }, { "epoch": 0.9210350211911666, "grad_norm": 0.153816357254982, "learning_rate": 1.583340200940027e-05, "loss": 0.4921, "step": 4129 }, { "epoch": 0.921258086103056, "grad_norm": 0.16074557602405548, "learning_rate": 1.583149032693676e-05, "loss": 0.4566, "step": 4130 }, { "epoch": 0.9214811510149453, "grad_norm": 0.15913258492946625, "learning_rate": 1.5829578321488636e-05, "loss": 0.4838, "step": 4131 }, { "epoch": 0.9217042159268347, "grad_norm": 0.15266257524490356, "learning_rate": 1.58276659931618e-05, "loss": 0.4704, "step": 4132 }, { "epoch": 0.9219272808387241, "grad_norm": 0.15182676911354065, "learning_rate": 1.5825753342062155e-05, "loss": 0.4825, "step": 4133 }, { "epoch": 0.9221503457506134, "grad_norm": 0.15525878965854645, "learning_rate": 1.582384036829565e-05, "loss": 0.4636, "step": 4134 }, { "epoch": 0.9223734106625028, "grad_norm": 0.15554243326187134, "learning_rate": 1.582192707196823e-05, "loss": 0.4945, "step": 4135 }, { "epoch": 0.9225964755743922, "grad_norm": 0.15928377211093903, "learning_rate": 1.582001345318587e-05, "loss": 0.4772, "step": 4136 }, { "epoch": 0.9228195404862815, "grad_norm": 0.15914230048656464, "learning_rate": 1.581809951205455e-05, "loss": 0.4723, "step": 4137 }, { "epoch": 0.9230426053981708, "grad_norm": 0.16770337522029877, "learning_rate": 1.581618524868029e-05, "loss": 0.4579, "step": 4138 }, { "epoch": 0.9232656703100602, "grad_norm": 0.16558465361595154, "learning_rate": 1.58142706631691e-05, "loss": 0.5186, "step": 4139 }, { "epoch": 0.9234887352219496, "grad_norm": 0.16763773560523987, "learning_rate": 1.5812355755627028e-05, "loss": 0.4887, "step": 4140 }, { "epoch": 0.9237118001338389, "grad_norm": 0.1562553197145462, "learning_rate": 1.5810440526160133e-05, "loss": 0.4953, "step": 4141 }, { "epoch": 0.9239348650457283, "grad_norm": 0.1571418195962906, "learning_rate": 1.5808524974874493e-05, "loss": 0.4753, "step": 4142 }, { "epoch": 0.9241579299576177, "grad_norm": 0.17362381517887115, "learning_rate": 1.5806609101876203e-05, "loss": 0.4704, "step": 4143 }, { "epoch": 0.924380994869507, "grad_norm": 0.1619691401720047, "learning_rate": 1.580469290727138e-05, "loss": 0.476, "step": 4144 }, { "epoch": 0.9246040597813964, "grad_norm": 0.16508683562278748, "learning_rate": 1.5802776391166146e-05, "loss": 0.5118, "step": 4145 }, { "epoch": 0.9248271246932858, "grad_norm": 0.16421370208263397, "learning_rate": 1.5800859553666655e-05, "loss": 0.4876, "step": 4146 }, { "epoch": 0.9250501896051752, "grad_norm": 0.16035978496074677, "learning_rate": 1.5798942394879073e-05, "loss": 0.5048, "step": 4147 }, { "epoch": 0.9252732545170644, "grad_norm": 0.16965007781982422, "learning_rate": 1.5797024914909584e-05, "loss": 0.4932, "step": 4148 }, { "epoch": 0.9254963194289538, "grad_norm": 0.16695380210876465, "learning_rate": 1.5795107113864393e-05, "loss": 0.5188, "step": 4149 }, { "epoch": 0.9257193843408432, "grad_norm": 0.1638556569814682, "learning_rate": 1.5793188991849717e-05, "loss": 0.4764, "step": 4150 }, { "epoch": 0.9259424492527325, "grad_norm": 0.16190040111541748, "learning_rate": 1.579127054897179e-05, "loss": 0.4877, "step": 4151 }, { "epoch": 0.9261655141646219, "grad_norm": 0.16219079494476318, "learning_rate": 1.5789351785336874e-05, "loss": 0.4823, "step": 4152 }, { "epoch": 0.9263885790765113, "grad_norm": 0.15557697415351868, "learning_rate": 1.5787432701051242e-05, "loss": 0.4826, "step": 4153 }, { "epoch": 0.9266116439884007, "grad_norm": 0.15617480874061584, "learning_rate": 1.578551329622118e-05, "loss": 0.5012, "step": 4154 }, { "epoch": 0.92683470890029, "grad_norm": 0.16417647898197174, "learning_rate": 1.5783593570953e-05, "loss": 0.4692, "step": 4155 }, { "epoch": 0.9270577738121794, "grad_norm": 0.16066023707389832, "learning_rate": 1.578167352535303e-05, "loss": 0.4932, "step": 4156 }, { "epoch": 0.9272808387240687, "grad_norm": 0.15020349621772766, "learning_rate": 1.577975315952761e-05, "loss": 0.4593, "step": 4157 }, { "epoch": 0.927503903635958, "grad_norm": 0.15949909389019012, "learning_rate": 1.57778324735831e-05, "loss": 0.4952, "step": 4158 }, { "epoch": 0.9277269685478474, "grad_norm": 0.169538676738739, "learning_rate": 1.577591146762589e-05, "loss": 0.4989, "step": 4159 }, { "epoch": 0.9279500334597368, "grad_norm": 0.1595795601606369, "learning_rate": 1.5773990141762366e-05, "loss": 0.4886, "step": 4160 }, { "epoch": 0.9281730983716261, "grad_norm": 0.17213213443756104, "learning_rate": 1.577206849609895e-05, "loss": 0.4383, "step": 4161 }, { "epoch": 0.9283961632835155, "grad_norm": 0.15668156743049622, "learning_rate": 1.5770146530742075e-05, "loss": 0.4672, "step": 4162 }, { "epoch": 0.9286192281954049, "grad_norm": 0.15910851955413818, "learning_rate": 1.576822424579819e-05, "loss": 0.5136, "step": 4163 }, { "epoch": 0.9288422931072943, "grad_norm": 0.1524379998445511, "learning_rate": 1.5766301641373755e-05, "loss": 0.4752, "step": 4164 }, { "epoch": 0.9290653580191836, "grad_norm": 0.1620722860097885, "learning_rate": 1.5764378717575272e-05, "loss": 0.46, "step": 4165 }, { "epoch": 0.9292884229310729, "grad_norm": 0.16785915195941925, "learning_rate": 1.576245547450923e-05, "loss": 0.5097, "step": 4166 }, { "epoch": 0.9295114878429623, "grad_norm": 0.15819083154201508, "learning_rate": 1.5760531912282163e-05, "loss": 0.5042, "step": 4167 }, { "epoch": 0.9297345527548516, "grad_norm": 0.15982572734355927, "learning_rate": 1.57586080310006e-05, "loss": 0.4832, "step": 4168 }, { "epoch": 0.929957617666741, "grad_norm": 0.16882368922233582, "learning_rate": 1.57566838307711e-05, "loss": 0.495, "step": 4169 }, { "epoch": 0.9301806825786304, "grad_norm": 0.15450328588485718, "learning_rate": 1.575475931170024e-05, "loss": 0.4831, "step": 4170 }, { "epoch": 0.9304037474905198, "grad_norm": 0.15884611010551453, "learning_rate": 1.575283447389461e-05, "loss": 0.4447, "step": 4171 }, { "epoch": 0.9306268124024091, "grad_norm": 0.16536371409893036, "learning_rate": 1.575090931746082e-05, "loss": 0.4708, "step": 4172 }, { "epoch": 0.9308498773142985, "grad_norm": 0.16749803721904755, "learning_rate": 1.57489838425055e-05, "loss": 0.4927, "step": 4173 }, { "epoch": 0.9310729422261879, "grad_norm": 0.17306271195411682, "learning_rate": 1.5747058049135286e-05, "loss": 0.5355, "step": 4174 }, { "epoch": 0.9312960071380771, "grad_norm": 0.1781659871339798, "learning_rate": 1.5745131937456853e-05, "loss": 0.5054, "step": 4175 }, { "epoch": 0.9315190720499665, "grad_norm": 0.16959606111049652, "learning_rate": 1.5743205507576873e-05, "loss": 0.4799, "step": 4176 }, { "epoch": 0.9317421369618559, "grad_norm": 0.16488516330718994, "learning_rate": 1.5741278759602045e-05, "loss": 0.4835, "step": 4177 }, { "epoch": 0.9319652018737452, "grad_norm": 0.1639404445886612, "learning_rate": 1.5739351693639085e-05, "loss": 0.4757, "step": 4178 }, { "epoch": 0.9321882667856346, "grad_norm": 0.17086337506771088, "learning_rate": 1.573742430979473e-05, "loss": 0.4982, "step": 4179 }, { "epoch": 0.932411331697524, "grad_norm": 0.162103071808815, "learning_rate": 1.5735496608175722e-05, "loss": 0.4593, "step": 4180 }, { "epoch": 0.9326343966094134, "grad_norm": 0.1637817919254303, "learning_rate": 1.5733568588888835e-05, "loss": 0.4776, "step": 4181 }, { "epoch": 0.9328574615213027, "grad_norm": 0.1821950078010559, "learning_rate": 1.5731640252040857e-05, "loss": 0.5159, "step": 4182 }, { "epoch": 0.9330805264331921, "grad_norm": 0.21807077527046204, "learning_rate": 1.5729711597738587e-05, "loss": 0.4721, "step": 4183 }, { "epoch": 0.9333035913450815, "grad_norm": 0.1628894805908203, "learning_rate": 1.5727782626088844e-05, "loss": 0.5033, "step": 4184 }, { "epoch": 0.9335266562569707, "grad_norm": 0.158598393201828, "learning_rate": 1.5725853337198476e-05, "loss": 0.4743, "step": 4185 }, { "epoch": 0.9337497211688601, "grad_norm": 0.16272245347499847, "learning_rate": 1.5723923731174327e-05, "loss": 0.4952, "step": 4186 }, { "epoch": 0.9339727860807495, "grad_norm": 0.16067437827587128, "learning_rate": 1.5721993808123283e-05, "loss": 0.4879, "step": 4187 }, { "epoch": 0.9341958509926389, "grad_norm": 0.16409751772880554, "learning_rate": 1.5720063568152222e-05, "loss": 0.5051, "step": 4188 }, { "epoch": 0.9344189159045282, "grad_norm": 0.15659625828266144, "learning_rate": 1.5718133011368065e-05, "loss": 0.4497, "step": 4189 }, { "epoch": 0.9346419808164176, "grad_norm": 0.17328283190727234, "learning_rate": 1.5716202137877732e-05, "loss": 0.4927, "step": 4190 }, { "epoch": 0.934865045728307, "grad_norm": 0.15970586240291595, "learning_rate": 1.5714270947788168e-05, "loss": 0.4706, "step": 4191 }, { "epoch": 0.9350881106401963, "grad_norm": 0.16526605188846588, "learning_rate": 1.5712339441206335e-05, "loss": 0.5266, "step": 4192 }, { "epoch": 0.9353111755520856, "grad_norm": 0.16756314039230347, "learning_rate": 1.5710407618239215e-05, "loss": 0.4818, "step": 4193 }, { "epoch": 0.935534240463975, "grad_norm": 0.16548283398151398, "learning_rate": 1.57084754789938e-05, "loss": 0.4586, "step": 4194 }, { "epoch": 0.9357573053758643, "grad_norm": 0.1704016625881195, "learning_rate": 1.57065430235771e-05, "loss": 0.4999, "step": 4195 }, { "epoch": 0.9359803702877537, "grad_norm": 0.16027678549289703, "learning_rate": 1.5704610252096158e-05, "loss": 0.5155, "step": 4196 }, { "epoch": 0.9362034351996431, "grad_norm": 0.16171659529209137, "learning_rate": 1.5702677164658013e-05, "loss": 0.482, "step": 4197 }, { "epoch": 0.9364265001115325, "grad_norm": 0.15903575718402863, "learning_rate": 1.5700743761369735e-05, "loss": 0.4862, "step": 4198 }, { "epoch": 0.9366495650234218, "grad_norm": 0.16277752816677094, "learning_rate": 1.569881004233841e-05, "loss": 0.5011, "step": 4199 }, { "epoch": 0.9368726299353112, "grad_norm": 0.18112027645111084, "learning_rate": 1.5696876007671137e-05, "loss": 0.5124, "step": 4200 }, { "epoch": 0.9370956948472006, "grad_norm": 0.1702015995979309, "learning_rate": 1.5694941657475037e-05, "loss": 0.5233, "step": 4201 }, { "epoch": 0.9373187597590898, "grad_norm": 0.15752531588077545, "learning_rate": 1.5693006991857248e-05, "loss": 0.486, "step": 4202 }, { "epoch": 0.9375418246709792, "grad_norm": 0.16015774011611938, "learning_rate": 1.5691072010924915e-05, "loss": 0.4816, "step": 4203 }, { "epoch": 0.9377648895828686, "grad_norm": 0.17005962133407593, "learning_rate": 1.568913671478522e-05, "loss": 0.4986, "step": 4204 }, { "epoch": 0.937987954494758, "grad_norm": 0.15905898809432983, "learning_rate": 1.5687201103545343e-05, "loss": 0.5031, "step": 4205 }, { "epoch": 0.9382110194066473, "grad_norm": 0.16087795794010162, "learning_rate": 1.56852651773125e-05, "loss": 0.4957, "step": 4206 }, { "epoch": 0.9384340843185367, "grad_norm": 0.16135592758655548, "learning_rate": 1.5683328936193908e-05, "loss": 0.5094, "step": 4207 }, { "epoch": 0.9386571492304261, "grad_norm": 0.18367910385131836, "learning_rate": 1.568139238029681e-05, "loss": 0.515, "step": 4208 }, { "epoch": 0.9388802141423154, "grad_norm": 0.16034136712551117, "learning_rate": 1.5679455509728468e-05, "loss": 0.5168, "step": 4209 }, { "epoch": 0.9391032790542048, "grad_norm": 0.17193713784217834, "learning_rate": 1.567751832459615e-05, "loss": 0.5078, "step": 4210 }, { "epoch": 0.9393263439660942, "grad_norm": 0.15431612730026245, "learning_rate": 1.5675580825007158e-05, "loss": 0.4718, "step": 4211 }, { "epoch": 0.9395494088779834, "grad_norm": 0.16114309430122375, "learning_rate": 1.5673643011068796e-05, "loss": 0.4779, "step": 4212 }, { "epoch": 0.9397724737898728, "grad_norm": 0.15799474716186523, "learning_rate": 1.5671704882888396e-05, "loss": 0.5086, "step": 4213 }, { "epoch": 0.9399955387017622, "grad_norm": 0.1653827577829361, "learning_rate": 1.5669766440573302e-05, "loss": 0.488, "step": 4214 }, { "epoch": 0.9402186036136516, "grad_norm": 0.16851937770843506, "learning_rate": 1.566782768423088e-05, "loss": 0.4806, "step": 4215 }, { "epoch": 0.9404416685255409, "grad_norm": 0.17142115533351898, "learning_rate": 1.566588861396851e-05, "loss": 0.5131, "step": 4216 }, { "epoch": 0.9406647334374303, "grad_norm": 0.15409086644649506, "learning_rate": 1.5663949229893587e-05, "loss": 0.4624, "step": 4217 }, { "epoch": 0.9408877983493197, "grad_norm": 0.16042175889015198, "learning_rate": 1.566200953211353e-05, "loss": 0.4816, "step": 4218 }, { "epoch": 0.941110863261209, "grad_norm": 0.15703439712524414, "learning_rate": 1.5660069520735766e-05, "loss": 0.504, "step": 4219 }, { "epoch": 0.9413339281730984, "grad_norm": 0.16937057673931122, "learning_rate": 1.565812919586775e-05, "loss": 0.4766, "step": 4220 }, { "epoch": 0.9415569930849877, "grad_norm": 0.17867791652679443, "learning_rate": 1.565618855761695e-05, "loss": 0.4872, "step": 4221 }, { "epoch": 0.9417800579968771, "grad_norm": 0.15998175740242004, "learning_rate": 1.5654247606090846e-05, "loss": 0.4844, "step": 4222 }, { "epoch": 0.9420031229087664, "grad_norm": 0.15649773180484772, "learning_rate": 1.5652306341396943e-05, "loss": 0.4774, "step": 4223 }, { "epoch": 0.9422261878206558, "grad_norm": 0.15873044729232788, "learning_rate": 1.5650364763642764e-05, "loss": 0.5239, "step": 4224 }, { "epoch": 0.9424492527325452, "grad_norm": 0.15709929168224335, "learning_rate": 1.564842287293584e-05, "loss": 0.4875, "step": 4225 }, { "epoch": 0.9426723176444345, "grad_norm": 0.1562758833169937, "learning_rate": 1.5646480669383726e-05, "loss": 0.5132, "step": 4226 }, { "epoch": 0.9428953825563239, "grad_norm": 0.162166565656662, "learning_rate": 1.5644538153093995e-05, "loss": 0.4901, "step": 4227 }, { "epoch": 0.9431184474682133, "grad_norm": 0.1618107706308365, "learning_rate": 1.564259532417424e-05, "loss": 0.4722, "step": 4228 }, { "epoch": 0.9433415123801027, "grad_norm": 0.15450115501880646, "learning_rate": 1.5640652182732057e-05, "loss": 0.4498, "step": 4229 }, { "epoch": 0.943564577291992, "grad_norm": 0.1606828272342682, "learning_rate": 1.563870872887508e-05, "loss": 0.4798, "step": 4230 }, { "epoch": 0.9437876422038813, "grad_norm": 0.15939275920391083, "learning_rate": 1.5636764962710936e-05, "loss": 0.4773, "step": 4231 }, { "epoch": 0.9440107071157707, "grad_norm": 0.16131429374217987, "learning_rate": 1.5634820884347303e-05, "loss": 0.4861, "step": 4232 }, { "epoch": 0.94423377202766, "grad_norm": 0.15643377602100372, "learning_rate": 1.563287649389184e-05, "loss": 0.5172, "step": 4233 }, { "epoch": 0.9444568369395494, "grad_norm": 0.1604384183883667, "learning_rate": 1.5630931791452246e-05, "loss": 0.4808, "step": 4234 }, { "epoch": 0.9446799018514388, "grad_norm": 0.15961483120918274, "learning_rate": 1.5628986777136223e-05, "loss": 0.4648, "step": 4235 }, { "epoch": 0.9449029667633281, "grad_norm": 0.16450265049934387, "learning_rate": 1.562704145105151e-05, "loss": 0.5006, "step": 4236 }, { "epoch": 0.9451260316752175, "grad_norm": 0.1608218103647232, "learning_rate": 1.5625095813305847e-05, "loss": 0.4983, "step": 4237 }, { "epoch": 0.9453490965871069, "grad_norm": 0.1569865345954895, "learning_rate": 1.5623149864006993e-05, "loss": 0.4969, "step": 4238 }, { "epoch": 0.9455721614989963, "grad_norm": 0.1610192060470581, "learning_rate": 1.5621203603262727e-05, "loss": 0.4842, "step": 4239 }, { "epoch": 0.9457952264108855, "grad_norm": 0.18263404071331024, "learning_rate": 1.561925703118085e-05, "loss": 0.5053, "step": 4240 }, { "epoch": 0.9460182913227749, "grad_norm": 0.16298796236515045, "learning_rate": 1.561731014786917e-05, "loss": 0.4906, "step": 4241 }, { "epoch": 0.9462413562346643, "grad_norm": 0.16036361455917358, "learning_rate": 1.5615362953435517e-05, "loss": 0.4647, "step": 4242 }, { "epoch": 0.9464644211465536, "grad_norm": 0.16408760845661163, "learning_rate": 1.5613415447987743e-05, "loss": 0.4836, "step": 4243 }, { "epoch": 0.946687486058443, "grad_norm": 0.1685844510793686, "learning_rate": 1.5611467631633713e-05, "loss": 0.5509, "step": 4244 }, { "epoch": 0.9469105509703324, "grad_norm": 0.16026711463928223, "learning_rate": 1.5609519504481306e-05, "loss": 0.4926, "step": 4245 }, { "epoch": 0.9471336158822218, "grad_norm": 0.19063352048397064, "learning_rate": 1.560757106663843e-05, "loss": 0.4975, "step": 4246 }, { "epoch": 0.9473566807941111, "grad_norm": 0.1648816168308258, "learning_rate": 1.560562231821299e-05, "loss": 0.5142, "step": 4247 }, { "epoch": 0.9475797457060005, "grad_norm": 0.1545959860086441, "learning_rate": 1.5603673259312927e-05, "loss": 0.4862, "step": 4248 }, { "epoch": 0.9478028106178898, "grad_norm": 0.15768404304981232, "learning_rate": 1.5601723890046188e-05, "loss": 0.4708, "step": 4249 }, { "epoch": 0.9480258755297791, "grad_norm": 0.1632533222436905, "learning_rate": 1.5599774210520747e-05, "loss": 0.5113, "step": 4250 }, { "epoch": 0.9482489404416685, "grad_norm": 0.1614227592945099, "learning_rate": 1.5597824220844583e-05, "loss": 0.5081, "step": 4251 }, { "epoch": 0.9484720053535579, "grad_norm": 0.15559621155261993, "learning_rate": 1.55958739211257e-05, "loss": 0.5012, "step": 4252 }, { "epoch": 0.9486950702654472, "grad_norm": 0.16258159279823303, "learning_rate": 1.5593923311472127e-05, "loss": 0.5018, "step": 4253 }, { "epoch": 0.9489181351773366, "grad_norm": 0.15811897814273834, "learning_rate": 1.559197239199189e-05, "loss": 0.4694, "step": 4254 }, { "epoch": 0.949141200089226, "grad_norm": 0.15983614325523376, "learning_rate": 1.5590021162793047e-05, "loss": 0.4758, "step": 4255 }, { "epoch": 0.9493642650011154, "grad_norm": 0.16571156680583954, "learning_rate": 1.558806962398367e-05, "loss": 0.4952, "step": 4256 }, { "epoch": 0.9495873299130047, "grad_norm": 0.17233605682849884, "learning_rate": 1.5586117775671844e-05, "loss": 0.5252, "step": 4257 }, { "epoch": 0.949810394824894, "grad_norm": 0.17330588400363922, "learning_rate": 1.558416561796568e-05, "loss": 0.5099, "step": 4258 }, { "epoch": 0.9500334597367834, "grad_norm": 0.15399448573589325, "learning_rate": 1.5582213150973296e-05, "loss": 0.4697, "step": 4259 }, { "epoch": 0.9502565246486727, "grad_norm": 0.17017517983913422, "learning_rate": 1.5580260374802837e-05, "loss": 0.4981, "step": 4260 }, { "epoch": 0.9504795895605621, "grad_norm": 0.1677882969379425, "learning_rate": 1.5578307289562457e-05, "loss": 0.5042, "step": 4261 }, { "epoch": 0.9507026544724515, "grad_norm": 0.15946677327156067, "learning_rate": 1.557635389536033e-05, "loss": 0.4697, "step": 4262 }, { "epoch": 0.9509257193843409, "grad_norm": 1.8394982814788818, "learning_rate": 1.557440019230465e-05, "loss": 0.5449, "step": 4263 }, { "epoch": 0.9511487842962302, "grad_norm": 0.16612844169139862, "learning_rate": 1.5572446180503618e-05, "loss": 0.4826, "step": 4264 }, { "epoch": 0.9513718492081196, "grad_norm": 0.15885433554649353, "learning_rate": 1.557049186006547e-05, "loss": 0.4837, "step": 4265 }, { "epoch": 0.951594914120009, "grad_norm": 0.16099439561367035, "learning_rate": 1.5568537231098438e-05, "loss": 0.4863, "step": 4266 }, { "epoch": 0.9518179790318982, "grad_norm": 0.15327222645282745, "learning_rate": 1.5566582293710787e-05, "loss": 0.4865, "step": 4267 }, { "epoch": 0.9520410439437876, "grad_norm": 0.1621992439031601, "learning_rate": 1.5564627048010797e-05, "loss": 0.5082, "step": 4268 }, { "epoch": 0.952264108855677, "grad_norm": 0.16586080193519592, "learning_rate": 1.5562671494106756e-05, "loss": 0.5066, "step": 4269 }, { "epoch": 0.9524871737675663, "grad_norm": 0.16953197121620178, "learning_rate": 1.5560715632106976e-05, "loss": 0.5127, "step": 4270 }, { "epoch": 0.9527102386794557, "grad_norm": 0.1806926429271698, "learning_rate": 1.555875946211979e-05, "loss": 0.5091, "step": 4271 }, { "epoch": 0.9529333035913451, "grad_norm": 0.16707123816013336, "learning_rate": 1.5556802984253534e-05, "loss": 0.5048, "step": 4272 }, { "epoch": 0.9531563685032345, "grad_norm": 0.29071560502052307, "learning_rate": 1.5554846198616576e-05, "loss": 0.4998, "step": 4273 }, { "epoch": 0.9533794334151238, "grad_norm": 0.19042329490184784, "learning_rate": 1.5552889105317296e-05, "loss": 0.4883, "step": 4274 }, { "epoch": 0.9536024983270132, "grad_norm": 0.1610180288553238, "learning_rate": 1.555093170446409e-05, "loss": 0.4816, "step": 4275 }, { "epoch": 0.9538255632389026, "grad_norm": 0.16079892218112946, "learning_rate": 1.5548973996165365e-05, "loss": 0.5118, "step": 4276 }, { "epoch": 0.9540486281507918, "grad_norm": 0.1768450289964676, "learning_rate": 1.5547015980529558e-05, "loss": 0.5249, "step": 4277 }, { "epoch": 0.9542716930626812, "grad_norm": 0.1556175947189331, "learning_rate": 1.5545057657665115e-05, "loss": 0.5081, "step": 4278 }, { "epoch": 0.9544947579745706, "grad_norm": 0.16020707786083221, "learning_rate": 1.5543099027680496e-05, "loss": 0.4846, "step": 4279 }, { "epoch": 0.95471782288646, "grad_norm": 0.1518724262714386, "learning_rate": 1.554114009068419e-05, "loss": 0.4866, "step": 4280 }, { "epoch": 0.9549408877983493, "grad_norm": 0.1620427668094635, "learning_rate": 1.5539180846784686e-05, "loss": 0.5054, "step": 4281 }, { "epoch": 0.9551639527102387, "grad_norm": 0.1567380130290985, "learning_rate": 1.5537221296090506e-05, "loss": 0.4857, "step": 4282 }, { "epoch": 0.9553870176221281, "grad_norm": 0.1572207808494568, "learning_rate": 1.553526143871018e-05, "loss": 0.4803, "step": 4283 }, { "epoch": 0.9556100825340174, "grad_norm": 0.1630953699350357, "learning_rate": 1.553330127475226e-05, "loss": 0.5076, "step": 4284 }, { "epoch": 0.9558331474459068, "grad_norm": 0.16126613318920135, "learning_rate": 1.5531340804325303e-05, "loss": 0.4566, "step": 4285 }, { "epoch": 0.9560562123577961, "grad_norm": 0.16429363191127777, "learning_rate": 1.5529380027537904e-05, "loss": 0.4905, "step": 4286 }, { "epoch": 0.9562792772696854, "grad_norm": 0.16202637553215027, "learning_rate": 1.5527418944498656e-05, "loss": 0.4962, "step": 4287 }, { "epoch": 0.9565023421815748, "grad_norm": 0.16903194785118103, "learning_rate": 1.5525457555316177e-05, "loss": 0.4917, "step": 4288 }, { "epoch": 0.9567254070934642, "grad_norm": 0.16751469671726227, "learning_rate": 1.5523495860099102e-05, "loss": 0.4762, "step": 4289 }, { "epoch": 0.9569484720053536, "grad_norm": 0.16918997466564178, "learning_rate": 1.5521533858956085e-05, "loss": 0.5005, "step": 4290 }, { "epoch": 0.9571715369172429, "grad_norm": 0.268764466047287, "learning_rate": 1.551957155199579e-05, "loss": 0.4734, "step": 4291 }, { "epoch": 0.9573946018291323, "grad_norm": 0.1583995223045349, "learning_rate": 1.55176089393269e-05, "loss": 0.4776, "step": 4292 }, { "epoch": 0.9576176667410217, "grad_norm": 0.16260802745819092, "learning_rate": 1.5515646021058124e-05, "loss": 0.487, "step": 4293 }, { "epoch": 0.957840731652911, "grad_norm": 0.16519910097122192, "learning_rate": 1.5513682797298172e-05, "loss": 0.4916, "step": 4294 }, { "epoch": 0.9580637965648003, "grad_norm": 0.15430989861488342, "learning_rate": 1.551171926815579e-05, "loss": 0.4645, "step": 4295 }, { "epoch": 0.9582868614766897, "grad_norm": 0.15664781630039215, "learning_rate": 1.5509755433739723e-05, "loss": 0.4824, "step": 4296 }, { "epoch": 0.9585099263885791, "grad_norm": 0.16775622963905334, "learning_rate": 1.550779129415874e-05, "loss": 0.4993, "step": 4297 }, { "epoch": 0.9587329913004684, "grad_norm": 0.163412943482399, "learning_rate": 1.550582684952163e-05, "loss": 0.5176, "step": 4298 }, { "epoch": 0.9589560562123578, "grad_norm": 0.1565362513065338, "learning_rate": 1.5503862099937198e-05, "loss": 0.4667, "step": 4299 }, { "epoch": 0.9591791211242472, "grad_norm": 0.16091248393058777, "learning_rate": 1.550189704551426e-05, "loss": 0.4823, "step": 4300 }, { "epoch": 0.9594021860361365, "grad_norm": 0.1619873195886612, "learning_rate": 1.5499931686361658e-05, "loss": 0.4753, "step": 4301 }, { "epoch": 0.9596252509480259, "grad_norm": 0.18374527990818024, "learning_rate": 1.549796602258824e-05, "loss": 0.465, "step": 4302 }, { "epoch": 0.9598483158599153, "grad_norm": 0.16035234928131104, "learning_rate": 1.549600005430288e-05, "loss": 0.4913, "step": 4303 }, { "epoch": 0.9600713807718046, "grad_norm": 0.16635017096996307, "learning_rate": 1.549403378161447e-05, "loss": 0.4869, "step": 4304 }, { "epoch": 0.9602944456836939, "grad_norm": 0.16243912279605865, "learning_rate": 1.5492067204631908e-05, "loss": 0.4787, "step": 4305 }, { "epoch": 0.9605175105955833, "grad_norm": 0.1565091758966446, "learning_rate": 1.5490100323464118e-05, "loss": 0.4805, "step": 4306 }, { "epoch": 0.9607405755074727, "grad_norm": 0.1620541661977768, "learning_rate": 1.5488133138220038e-05, "loss": 0.5016, "step": 4307 }, { "epoch": 0.960963640419362, "grad_norm": 0.16399456560611725, "learning_rate": 1.5486165649008623e-05, "loss": 0.4977, "step": 4308 }, { "epoch": 0.9611867053312514, "grad_norm": 0.1687907576560974, "learning_rate": 1.5484197855938847e-05, "loss": 0.4966, "step": 4309 }, { "epoch": 0.9614097702431408, "grad_norm": 0.16494713723659515, "learning_rate": 1.548222975911969e-05, "loss": 0.5044, "step": 4310 }, { "epoch": 0.9616328351550301, "grad_norm": 0.16072864830493927, "learning_rate": 1.5480261358660172e-05, "loss": 0.4969, "step": 4311 }, { "epoch": 0.9618559000669195, "grad_norm": 0.1517607718706131, "learning_rate": 1.5478292654669304e-05, "loss": 0.4789, "step": 4312 }, { "epoch": 0.9620789649788088, "grad_norm": 0.15903200209140778, "learning_rate": 1.547632364725613e-05, "loss": 0.4877, "step": 4313 }, { "epoch": 0.9623020298906982, "grad_norm": 0.2090669870376587, "learning_rate": 1.5474354336529706e-05, "loss": 0.4699, "step": 4314 }, { "epoch": 0.9625250948025875, "grad_norm": 0.17890144884586334, "learning_rate": 1.5472384722599102e-05, "loss": 0.4921, "step": 4315 }, { "epoch": 0.9627481597144769, "grad_norm": 0.1629069298505783, "learning_rate": 1.547041480557341e-05, "loss": 0.5049, "step": 4316 }, { "epoch": 0.9629712246263663, "grad_norm": 0.1475018411874771, "learning_rate": 1.5468444585561736e-05, "loss": 0.4795, "step": 4317 }, { "epoch": 0.9631942895382556, "grad_norm": 0.15612593293190002, "learning_rate": 1.54664740626732e-05, "loss": 0.4402, "step": 4318 }, { "epoch": 0.963417354450145, "grad_norm": 0.15412123501300812, "learning_rate": 1.546450323701695e-05, "loss": 0.4493, "step": 4319 }, { "epoch": 0.9636404193620344, "grad_norm": 0.16448107361793518, "learning_rate": 1.5462532108702134e-05, "loss": 0.4979, "step": 4320 }, { "epoch": 0.9638634842739238, "grad_norm": 0.16529002785682678, "learning_rate": 1.546056067783793e-05, "loss": 0.5127, "step": 4321 }, { "epoch": 0.964086549185813, "grad_norm": 0.15807853639125824, "learning_rate": 1.545858894453353e-05, "loss": 0.4638, "step": 4322 }, { "epoch": 0.9643096140977024, "grad_norm": 0.16824275255203247, "learning_rate": 1.5456616908898134e-05, "loss": 0.4877, "step": 4323 }, { "epoch": 0.9645326790095918, "grad_norm": 0.15887348353862762, "learning_rate": 1.5454644571040973e-05, "loss": 0.4688, "step": 4324 }, { "epoch": 0.9647557439214811, "grad_norm": 0.16340267658233643, "learning_rate": 1.545267193107128e-05, "loss": 0.4779, "step": 4325 }, { "epoch": 0.9649788088333705, "grad_norm": 0.16982871294021606, "learning_rate": 1.545069898909832e-05, "loss": 0.5058, "step": 4326 }, { "epoch": 0.9652018737452599, "grad_norm": 0.15784378349781036, "learning_rate": 1.544872574523137e-05, "loss": 0.4563, "step": 4327 }, { "epoch": 0.9654249386571492, "grad_norm": 0.15926995873451233, "learning_rate": 1.5446752199579703e-05, "loss": 0.4893, "step": 4328 }, { "epoch": 0.9656480035690386, "grad_norm": 0.16018277406692505, "learning_rate": 1.544477835225265e-05, "loss": 0.4876, "step": 4329 }, { "epoch": 0.965871068480928, "grad_norm": 0.16839507222175598, "learning_rate": 1.544280420335951e-05, "loss": 0.5184, "step": 4330 }, { "epoch": 0.9660941333928174, "grad_norm": 0.1582273244857788, "learning_rate": 1.5440829753009646e-05, "loss": 0.4895, "step": 4331 }, { "epoch": 0.9663171983047066, "grad_norm": 0.15812306106090546, "learning_rate": 1.5438855001312402e-05, "loss": 0.5244, "step": 4332 }, { "epoch": 0.966540263216596, "grad_norm": 0.15881675481796265, "learning_rate": 1.5436879948377157e-05, "loss": 0.4541, "step": 4333 }, { "epoch": 0.9667633281284854, "grad_norm": 0.1649298518896103, "learning_rate": 1.5434904594313303e-05, "loss": 0.4898, "step": 4334 }, { "epoch": 0.9669863930403747, "grad_norm": 0.1670350432395935, "learning_rate": 1.5432928939230243e-05, "loss": 0.4961, "step": 4335 }, { "epoch": 0.9672094579522641, "grad_norm": 0.17490597069263458, "learning_rate": 1.5430952983237404e-05, "loss": 0.4996, "step": 4336 }, { "epoch": 0.9674325228641535, "grad_norm": 0.18037396669387817, "learning_rate": 1.542897672644423e-05, "loss": 0.4856, "step": 4337 }, { "epoch": 0.9676555877760429, "grad_norm": 0.16085660457611084, "learning_rate": 1.5427000168960172e-05, "loss": 0.4741, "step": 4338 }, { "epoch": 0.9678786526879322, "grad_norm": 0.16227804124355316, "learning_rate": 1.5425023310894707e-05, "loss": 0.4858, "step": 4339 }, { "epoch": 0.9681017175998216, "grad_norm": 0.21843907237052917, "learning_rate": 1.5423046152357328e-05, "loss": 0.4705, "step": 4340 }, { "epoch": 0.968324782511711, "grad_norm": 0.16902711987495422, "learning_rate": 1.542106869345754e-05, "loss": 0.4673, "step": 4341 }, { "epoch": 0.9685478474236002, "grad_norm": 0.15176647901535034, "learning_rate": 1.5419090934304865e-05, "loss": 0.477, "step": 4342 }, { "epoch": 0.9687709123354896, "grad_norm": 0.15946295857429504, "learning_rate": 1.5417112875008854e-05, "loss": 0.4768, "step": 4343 }, { "epoch": 0.968993977247379, "grad_norm": 0.1705644428730011, "learning_rate": 1.5415134515679053e-05, "loss": 0.4806, "step": 4344 }, { "epoch": 0.9692170421592683, "grad_norm": 0.1598884016275406, "learning_rate": 1.541315585642504e-05, "loss": 0.5018, "step": 4345 }, { "epoch": 0.9694401070711577, "grad_norm": 0.15806621313095093, "learning_rate": 1.54111768973564e-05, "loss": 0.4854, "step": 4346 }, { "epoch": 0.9696631719830471, "grad_norm": 0.15730160474777222, "learning_rate": 1.5409197638582753e-05, "loss": 0.499, "step": 4347 }, { "epoch": 0.9698862368949365, "grad_norm": 0.16367729008197784, "learning_rate": 1.540721808021371e-05, "loss": 0.4784, "step": 4348 }, { "epoch": 0.9701093018068258, "grad_norm": 0.16696509718894958, "learning_rate": 1.5405238222358925e-05, "loss": 0.4887, "step": 4349 }, { "epoch": 0.9703323667187151, "grad_norm": 0.1621687263250351, "learning_rate": 1.5403258065128042e-05, "loss": 0.5113, "step": 4350 }, { "epoch": 0.9705554316306045, "grad_norm": 0.1656588315963745, "learning_rate": 1.5401277608630742e-05, "loss": 0.5021, "step": 4351 }, { "epoch": 0.9707784965424938, "grad_norm": 0.17137381434440613, "learning_rate": 1.539929685297671e-05, "loss": 0.4965, "step": 4352 }, { "epoch": 0.9710015614543832, "grad_norm": 0.16810309886932373, "learning_rate": 1.5397315798275654e-05, "loss": 0.4961, "step": 4353 }, { "epoch": 0.9712246263662726, "grad_norm": 0.17059317231178284, "learning_rate": 1.5395334444637306e-05, "loss": 0.4708, "step": 4354 }, { "epoch": 0.971447691278162, "grad_norm": 0.16224807500839233, "learning_rate": 1.539335279217139e-05, "loss": 0.4593, "step": 4355 }, { "epoch": 0.9716707561900513, "grad_norm": 0.17209084331989288, "learning_rate": 1.5391370840987674e-05, "loss": 0.4959, "step": 4356 }, { "epoch": 0.9718938211019407, "grad_norm": 0.16699501872062683, "learning_rate": 1.5389388591195928e-05, "loss": 0.4897, "step": 4357 }, { "epoch": 0.9721168860138301, "grad_norm": 0.15935075283050537, "learning_rate": 1.538740604290594e-05, "loss": 0.4833, "step": 4358 }, { "epoch": 0.9723399509257193, "grad_norm": 0.16760936379432678, "learning_rate": 1.538542319622752e-05, "loss": 0.4707, "step": 4359 }, { "epoch": 0.9725630158376087, "grad_norm": 0.19774286448955536, "learning_rate": 1.5383440051270486e-05, "loss": 0.4799, "step": 4360 }, { "epoch": 0.9727860807494981, "grad_norm": 0.1727793663740158, "learning_rate": 1.5381456608144677e-05, "loss": 0.4761, "step": 4361 }, { "epoch": 0.9730091456613874, "grad_norm": 0.1611672341823578, "learning_rate": 1.5379472866959954e-05, "loss": 0.4984, "step": 4362 }, { "epoch": 0.9732322105732768, "grad_norm": 0.15909035503864288, "learning_rate": 1.537748882782618e-05, "loss": 0.487, "step": 4363 }, { "epoch": 0.9734552754851662, "grad_norm": 0.17754100263118744, "learning_rate": 1.5375504490853255e-05, "loss": 0.4782, "step": 4364 }, { "epoch": 0.9736783403970556, "grad_norm": 0.19527114927768707, "learning_rate": 1.5373519856151077e-05, "loss": 0.4783, "step": 4365 }, { "epoch": 0.9739014053089449, "grad_norm": 0.1601899415254593, "learning_rate": 1.5371534923829562e-05, "loss": 0.5042, "step": 4366 }, { "epoch": 0.9741244702208343, "grad_norm": 0.16773121058940887, "learning_rate": 1.536954969399866e-05, "loss": 0.5025, "step": 4367 }, { "epoch": 0.9743475351327237, "grad_norm": 0.1618761271238327, "learning_rate": 1.5367564166768322e-05, "loss": 0.5125, "step": 4368 }, { "epoch": 0.9745706000446129, "grad_norm": 0.1555018573999405, "learning_rate": 1.5365578342248515e-05, "loss": 0.4754, "step": 4369 }, { "epoch": 0.9747936649565023, "grad_norm": 0.16692039370536804, "learning_rate": 1.5363592220549227e-05, "loss": 0.5025, "step": 4370 }, { "epoch": 0.9750167298683917, "grad_norm": 0.16272681951522827, "learning_rate": 1.5361605801780465e-05, "loss": 0.513, "step": 4371 }, { "epoch": 0.9752397947802811, "grad_norm": 0.17153267562389374, "learning_rate": 1.535961908605225e-05, "loss": 0.4716, "step": 4372 }, { "epoch": 0.9754628596921704, "grad_norm": 0.162004753947258, "learning_rate": 1.5357632073474614e-05, "loss": 0.4752, "step": 4373 }, { "epoch": 0.9756859246040598, "grad_norm": 0.17653700709342957, "learning_rate": 1.535564476415761e-05, "loss": 0.4875, "step": 4374 }, { "epoch": 0.9759089895159492, "grad_norm": 0.16345418989658356, "learning_rate": 1.535365715821132e-05, "loss": 0.5036, "step": 4375 }, { "epoch": 0.9761320544278385, "grad_norm": 0.16799689829349518, "learning_rate": 1.535166925574581e-05, "loss": 0.5035, "step": 4376 }, { "epoch": 0.9763551193397279, "grad_norm": 0.204716295003891, "learning_rate": 1.53496810568712e-05, "loss": 0.501, "step": 4377 }, { "epoch": 0.9765781842516172, "grad_norm": 0.15521782636642456, "learning_rate": 1.53476925616976e-05, "loss": 0.4864, "step": 4378 }, { "epoch": 0.9768012491635066, "grad_norm": 0.16439463198184967, "learning_rate": 1.5345703770335147e-05, "loss": 0.491, "step": 4379 }, { "epoch": 0.9770243140753959, "grad_norm": 0.16255317628383636, "learning_rate": 1.5343714682893997e-05, "loss": 0.5108, "step": 4380 }, { "epoch": 0.9772473789872853, "grad_norm": 0.15555396676063538, "learning_rate": 1.534172529948431e-05, "loss": 0.4506, "step": 4381 }, { "epoch": 0.9774704438991747, "grad_norm": 0.16245706379413605, "learning_rate": 1.5339735620216275e-05, "loss": 0.4892, "step": 4382 }, { "epoch": 0.977693508811064, "grad_norm": 0.16190017759799957, "learning_rate": 1.5337745645200097e-05, "loss": 0.4716, "step": 4383 }, { "epoch": 0.9779165737229534, "grad_norm": 0.1614699810743332, "learning_rate": 1.5335755374545985e-05, "loss": 0.5041, "step": 4384 }, { "epoch": 0.9781396386348428, "grad_norm": 0.1543981432914734, "learning_rate": 1.533376480836418e-05, "loss": 0.4837, "step": 4385 }, { "epoch": 0.978362703546732, "grad_norm": 0.15510593354701996, "learning_rate": 1.5331773946764928e-05, "loss": 0.5073, "step": 4386 }, { "epoch": 0.9785857684586214, "grad_norm": 0.16036750376224518, "learning_rate": 1.5329782789858495e-05, "loss": 0.511, "step": 4387 }, { "epoch": 0.9788088333705108, "grad_norm": 0.1586884707212448, "learning_rate": 1.532779133775517e-05, "loss": 0.4783, "step": 4388 }, { "epoch": 0.9790318982824002, "grad_norm": 0.1595693677663803, "learning_rate": 1.5325799590565247e-05, "loss": 0.4847, "step": 4389 }, { "epoch": 0.9792549631942895, "grad_norm": 0.1676463484764099, "learning_rate": 1.532380754839904e-05, "loss": 0.4857, "step": 4390 }, { "epoch": 0.9794780281061789, "grad_norm": 0.1569330245256424, "learning_rate": 1.532181521136688e-05, "loss": 0.5038, "step": 4391 }, { "epoch": 0.9797010930180683, "grad_norm": 0.15425747632980347, "learning_rate": 1.5319822579579125e-05, "loss": 0.4687, "step": 4392 }, { "epoch": 0.9799241579299576, "grad_norm": 0.1658424586057663, "learning_rate": 1.5317829653146127e-05, "loss": 0.4727, "step": 4393 }, { "epoch": 0.980147222841847, "grad_norm": 0.16950277984142303, "learning_rate": 1.5315836432178275e-05, "loss": 0.5056, "step": 4394 }, { "epoch": 0.9803702877537364, "grad_norm": 0.1796022653579712, "learning_rate": 1.5313842916785965e-05, "loss": 0.5062, "step": 4395 }, { "epoch": 0.9805933526656258, "grad_norm": 0.16499702632427216, "learning_rate": 1.5311849107079603e-05, "loss": 0.5142, "step": 4396 }, { "epoch": 0.980816417577515, "grad_norm": 0.1709393560886383, "learning_rate": 1.5309855003169632e-05, "loss": 0.5001, "step": 4397 }, { "epoch": 0.9810394824894044, "grad_norm": 0.16077296435832977, "learning_rate": 1.5307860605166487e-05, "loss": 0.4908, "step": 4398 }, { "epoch": 0.9812625474012938, "grad_norm": 0.17582206428050995, "learning_rate": 1.5305865913180633e-05, "loss": 0.4844, "step": 4399 }, { "epoch": 0.9814856123131831, "grad_norm": 0.16168825328350067, "learning_rate": 1.5303870927322552e-05, "loss": 0.4856, "step": 4400 }, { "epoch": 0.9817086772250725, "grad_norm": 0.15929754078388214, "learning_rate": 1.5301875647702732e-05, "loss": 0.4878, "step": 4401 }, { "epoch": 0.9819317421369619, "grad_norm": 0.16380812227725983, "learning_rate": 1.5299880074431693e-05, "loss": 0.5009, "step": 4402 }, { "epoch": 0.9821548070488512, "grad_norm": 0.17140789330005646, "learning_rate": 1.5297884207619957e-05, "loss": 0.4604, "step": 4403 }, { "epoch": 0.9823778719607406, "grad_norm": 0.17358756065368652, "learning_rate": 1.5295888047378064e-05, "loss": 0.4905, "step": 4404 }, { "epoch": 0.98260093687263, "grad_norm": 0.16201473772525787, "learning_rate": 1.5293891593816583e-05, "loss": 0.5156, "step": 4405 }, { "epoch": 0.9828240017845193, "grad_norm": 0.16091175377368927, "learning_rate": 1.529189484704608e-05, "loss": 0.4883, "step": 4406 }, { "epoch": 0.9830470666964086, "grad_norm": 0.16278314590454102, "learning_rate": 1.528989780717716e-05, "loss": 0.4727, "step": 4407 }, { "epoch": 0.983270131608298, "grad_norm": 0.17446744441986084, "learning_rate": 1.5287900474320422e-05, "loss": 0.4824, "step": 4408 }, { "epoch": 0.9834931965201874, "grad_norm": 0.3287087678909302, "learning_rate": 1.5285902848586495e-05, "loss": 0.49, "step": 4409 }, { "epoch": 0.9837162614320767, "grad_norm": 0.16612185537815094, "learning_rate": 1.5283904930086017e-05, "loss": 0.4911, "step": 4410 }, { "epoch": 0.9839393263439661, "grad_norm": 0.16220030188560486, "learning_rate": 1.528190671892965e-05, "loss": 0.4537, "step": 4411 }, { "epoch": 0.9841623912558555, "grad_norm": 0.1631203591823578, "learning_rate": 1.5279908215228058e-05, "loss": 0.4725, "step": 4412 }, { "epoch": 0.9843854561677449, "grad_norm": 0.15897974371910095, "learning_rate": 1.5277909419091942e-05, "loss": 0.4794, "step": 4413 }, { "epoch": 0.9846085210796341, "grad_norm": 0.18077795207500458, "learning_rate": 1.5275910330632e-05, "loss": 0.4764, "step": 4414 }, { "epoch": 0.9848315859915235, "grad_norm": 0.1682848483324051, "learning_rate": 1.5273910949958963e-05, "loss": 0.5024, "step": 4415 }, { "epoch": 0.9850546509034129, "grad_norm": 0.1584632247686386, "learning_rate": 1.527191127718356e-05, "loss": 0.5007, "step": 4416 }, { "epoch": 0.9852777158153022, "grad_norm": 0.1735336184501648, "learning_rate": 1.5269911312416547e-05, "loss": 0.4887, "step": 4417 }, { "epoch": 0.9855007807271916, "grad_norm": 0.16184300184249878, "learning_rate": 1.5267911055768697e-05, "loss": 0.5064, "step": 4418 }, { "epoch": 0.985723845639081, "grad_norm": 0.15283328294754028, "learning_rate": 1.5265910507350797e-05, "loss": 0.5012, "step": 4419 }, { "epoch": 0.9859469105509703, "grad_norm": 0.1493072211742401, "learning_rate": 1.526390966727365e-05, "loss": 0.4763, "step": 4420 }, { "epoch": 0.9861699754628597, "grad_norm": 0.15723635256290436, "learning_rate": 1.526190853564807e-05, "loss": 0.4893, "step": 4421 }, { "epoch": 0.9863930403747491, "grad_norm": 0.17012642323970795, "learning_rate": 1.52599071125849e-05, "loss": 0.4905, "step": 4422 }, { "epoch": 0.9866161052866385, "grad_norm": 0.15649664402008057, "learning_rate": 1.5257905398194988e-05, "loss": 0.4841, "step": 4423 }, { "epoch": 0.9868391701985277, "grad_norm": 0.1696402132511139, "learning_rate": 1.5255903392589204e-05, "loss": 0.4792, "step": 4424 }, { "epoch": 0.9870622351104171, "grad_norm": 0.17853881418704987, "learning_rate": 1.5253901095878423e-05, "loss": 0.5112, "step": 4425 }, { "epoch": 0.9872853000223065, "grad_norm": 0.1598142683506012, "learning_rate": 1.5251898508173558e-05, "loss": 0.4907, "step": 4426 }, { "epoch": 0.9875083649341958, "grad_norm": 0.1565473973751068, "learning_rate": 1.5249895629585511e-05, "loss": 0.4922, "step": 4427 }, { "epoch": 0.9877314298460852, "grad_norm": 0.16049052774906158, "learning_rate": 1.5247892460225226e-05, "loss": 0.4947, "step": 4428 }, { "epoch": 0.9879544947579746, "grad_norm": 0.16537557542324066, "learning_rate": 1.5245889000203644e-05, "loss": 0.4547, "step": 4429 }, { "epoch": 0.988177559669864, "grad_norm": 0.16105584800243378, "learning_rate": 1.5243885249631732e-05, "loss": 0.4697, "step": 4430 }, { "epoch": 0.9884006245817533, "grad_norm": 0.5197194814682007, "learning_rate": 1.5241881208620468e-05, "loss": 0.4723, "step": 4431 }, { "epoch": 0.9886236894936427, "grad_norm": 0.15987588465213776, "learning_rate": 1.5239876877280852e-05, "loss": 0.5388, "step": 4432 }, { "epoch": 0.988846754405532, "grad_norm": 0.16223685443401337, "learning_rate": 1.5237872255723894e-05, "loss": 0.5185, "step": 4433 }, { "epoch": 0.9890698193174213, "grad_norm": 0.16801123321056366, "learning_rate": 1.5235867344060622e-05, "loss": 0.4825, "step": 4434 }, { "epoch": 0.9892928842293107, "grad_norm": 0.16588987410068512, "learning_rate": 1.523386214240208e-05, "loss": 0.4933, "step": 4435 }, { "epoch": 0.9895159491412001, "grad_norm": 0.1642870157957077, "learning_rate": 1.5231856650859334e-05, "loss": 0.4572, "step": 4436 }, { "epoch": 0.9897390140530894, "grad_norm": 0.1569276601076126, "learning_rate": 1.5229850869543454e-05, "loss": 0.4844, "step": 4437 }, { "epoch": 0.9899620789649788, "grad_norm": 0.16232363879680634, "learning_rate": 1.5227844798565538e-05, "loss": 0.5041, "step": 4438 }, { "epoch": 0.9901851438768682, "grad_norm": 0.18178559839725494, "learning_rate": 1.5225838438036693e-05, "loss": 0.5203, "step": 4439 }, { "epoch": 0.9904082087887576, "grad_norm": 0.1483062505722046, "learning_rate": 1.5223831788068039e-05, "loss": 0.4601, "step": 4440 }, { "epoch": 0.9906312737006469, "grad_norm": 0.16904570162296295, "learning_rate": 1.5221824848770728e-05, "loss": 0.4692, "step": 4441 }, { "epoch": 0.9908543386125362, "grad_norm": 0.16170957684516907, "learning_rate": 1.5219817620255906e-05, "loss": 0.476, "step": 4442 }, { "epoch": 0.9910774035244256, "grad_norm": 0.15581966936588287, "learning_rate": 1.521781010263475e-05, "loss": 0.4685, "step": 4443 }, { "epoch": 0.9913004684363149, "grad_norm": 0.17382118105888367, "learning_rate": 1.521580229601845e-05, "loss": 0.511, "step": 4444 }, { "epoch": 0.9915235333482043, "grad_norm": 0.16696421802043915, "learning_rate": 1.521379420051821e-05, "loss": 0.5072, "step": 4445 }, { "epoch": 0.9917465982600937, "grad_norm": 0.16701167821884155, "learning_rate": 1.521178581624525e-05, "loss": 0.521, "step": 4446 }, { "epoch": 0.9919696631719831, "grad_norm": 0.16650201380252838, "learning_rate": 1.520977714331081e-05, "loss": 0.4945, "step": 4447 }, { "epoch": 0.9921927280838724, "grad_norm": 0.1575930416584015, "learning_rate": 1.5207768181826138e-05, "loss": 0.4668, "step": 4448 }, { "epoch": 0.9924157929957618, "grad_norm": 0.16714414954185486, "learning_rate": 1.5205758931902507e-05, "loss": 0.4739, "step": 4449 }, { "epoch": 0.9926388579076512, "grad_norm": 0.1591663658618927, "learning_rate": 1.5203749393651204e-05, "loss": 0.5024, "step": 4450 }, { "epoch": 0.9928619228195404, "grad_norm": 0.16330404579639435, "learning_rate": 1.5201739567183525e-05, "loss": 0.5104, "step": 4451 }, { "epoch": 0.9930849877314298, "grad_norm": 0.16072112321853638, "learning_rate": 1.519972945261079e-05, "loss": 0.4722, "step": 4452 }, { "epoch": 0.9933080526433192, "grad_norm": 0.1594439148902893, "learning_rate": 1.5197719050044328e-05, "loss": 0.4951, "step": 4453 }, { "epoch": 0.9935311175552086, "grad_norm": 0.17731733620166779, "learning_rate": 1.519570835959549e-05, "loss": 0.4799, "step": 4454 }, { "epoch": 0.9937541824670979, "grad_norm": 0.1658770889043808, "learning_rate": 1.5193697381375641e-05, "loss": 0.471, "step": 4455 }, { "epoch": 0.9939772473789873, "grad_norm": 0.168108731508255, "learning_rate": 1.5191686115496163e-05, "loss": 0.4856, "step": 4456 }, { "epoch": 0.9942003122908767, "grad_norm": 0.1631343513727188, "learning_rate": 1.5189674562068448e-05, "loss": 0.4795, "step": 4457 }, { "epoch": 0.994423377202766, "grad_norm": 0.17591005563735962, "learning_rate": 1.5187662721203916e-05, "loss": 0.4431, "step": 4458 }, { "epoch": 0.9946464421146554, "grad_norm": 0.16236771643161774, "learning_rate": 1.5185650593013984e-05, "loss": 0.5097, "step": 4459 }, { "epoch": 0.9948695070265448, "grad_norm": 0.15711717307567596, "learning_rate": 1.5183638177610109e-05, "loss": 0.4896, "step": 4460 }, { "epoch": 0.995092571938434, "grad_norm": 0.16357070207595825, "learning_rate": 1.5181625475103744e-05, "loss": 0.4878, "step": 4461 }, { "epoch": 0.9953156368503234, "grad_norm": 0.1608823835849762, "learning_rate": 1.5179612485606366e-05, "loss": 0.4924, "step": 4462 }, { "epoch": 0.9955387017622128, "grad_norm": 0.1610344648361206, "learning_rate": 1.5177599209229468e-05, "loss": 0.4564, "step": 4463 }, { "epoch": 0.9957617666741022, "grad_norm": 0.1570533961057663, "learning_rate": 1.5175585646084557e-05, "loss": 0.4665, "step": 4464 }, { "epoch": 0.9959848315859915, "grad_norm": 0.16878454387187958, "learning_rate": 1.5173571796283155e-05, "loss": 0.4832, "step": 4465 }, { "epoch": 0.9962078964978809, "grad_norm": 0.15934909880161285, "learning_rate": 1.5171557659936806e-05, "loss": 0.4629, "step": 4466 }, { "epoch": 0.9964309614097703, "grad_norm": 0.17097942531108856, "learning_rate": 1.5169543237157062e-05, "loss": 0.5009, "step": 4467 }, { "epoch": 0.9966540263216596, "grad_norm": 0.1563706398010254, "learning_rate": 1.5167528528055498e-05, "loss": 0.4864, "step": 4468 }, { "epoch": 0.996877091233549, "grad_norm": 0.16761906445026398, "learning_rate": 1.5165513532743696e-05, "loss": 0.4688, "step": 4469 }, { "epoch": 0.9971001561454383, "grad_norm": 0.16788896918296814, "learning_rate": 1.5163498251333267e-05, "loss": 0.4975, "step": 4470 }, { "epoch": 0.9973232210573277, "grad_norm": 0.16318921744823456, "learning_rate": 1.516148268393582e-05, "loss": 0.483, "step": 4471 }, { "epoch": 0.997546285969217, "grad_norm": 0.1533585637807846, "learning_rate": 1.5159466830662997e-05, "loss": 0.4888, "step": 4472 }, { "epoch": 0.9977693508811064, "grad_norm": 0.16081759333610535, "learning_rate": 1.515745069162645e-05, "loss": 0.4997, "step": 4473 }, { "epoch": 0.9979924157929958, "grad_norm": 0.17346571385860443, "learning_rate": 1.5155434266937836e-05, "loss": 0.4918, "step": 4474 }, { "epoch": 0.9982154807048851, "grad_norm": 0.16051939129829407, "learning_rate": 1.515341755670885e-05, "loss": 0.4911, "step": 4475 }, { "epoch": 0.9984385456167745, "grad_norm": 0.15489095449447632, "learning_rate": 1.5151400561051177e-05, "loss": 0.4573, "step": 4476 }, { "epoch": 0.9986616105286639, "grad_norm": 0.17051677405834198, "learning_rate": 1.5149383280076544e-05, "loss": 0.47, "step": 4477 }, { "epoch": 0.9988846754405531, "grad_norm": 0.16898037493228912, "learning_rate": 1.5147365713896669e-05, "loss": 0.5265, "step": 4478 }, { "epoch": 0.9991077403524425, "grad_norm": 0.1665852814912796, "learning_rate": 1.5145347862623303e-05, "loss": 0.4967, "step": 4479 }, { "epoch": 0.9993308052643319, "grad_norm": 0.16627469658851624, "learning_rate": 1.5143329726368205e-05, "loss": 0.4755, "step": 4480 }, { "epoch": 0.9995538701762213, "grad_norm": 0.15166249871253967, "learning_rate": 1.5141311305243158e-05, "loss": 0.4762, "step": 4481 }, { "epoch": 0.9997769350881106, "grad_norm": 0.15809613466262817, "learning_rate": 1.513929259935995e-05, "loss": 0.4825, "step": 4482 }, { "epoch": 1.0, "grad_norm": 0.24375846982002258, "learning_rate": 1.5137273608830387e-05, "loss": 0.4902, "step": 4483 }, { "epoch": 1.0, "eval_loss": 0.3229251801967621, "eval_runtime": 666.2343, "eval_samples_per_second": 94.726, "eval_steps_per_second": 1.481, "step": 4483 }, { "epoch": 1.0002230649118893, "grad_norm": 0.2046872079372406, "learning_rate": 1.5135254333766302e-05, "loss": 0.5093, "step": 4484 }, { "epoch": 1.0004461298237788, "grad_norm": 0.16689886152744293, "learning_rate": 1.5133234774279526e-05, "loss": 0.5118, "step": 4485 }, { "epoch": 1.000669194735668, "grad_norm": 0.16698423027992249, "learning_rate": 1.5131214930481922e-05, "loss": 0.4917, "step": 4486 }, { "epoch": 1.0008922596475573, "grad_norm": 0.17248550057411194, "learning_rate": 1.5129194802485354e-05, "loss": 0.4693, "step": 4487 }, { "epoch": 1.0011153245594469, "grad_norm": 0.16011400520801544, "learning_rate": 1.5127174390401717e-05, "loss": 0.4788, "step": 4488 }, { "epoch": 1.0013383894713361, "grad_norm": 0.16476082801818848, "learning_rate": 1.512515369434291e-05, "loss": 0.4723, "step": 4489 }, { "epoch": 1.0015614543832254, "grad_norm": 0.1672191321849823, "learning_rate": 1.5123132714420856e-05, "loss": 0.484, "step": 4490 }, { "epoch": 1.001784519295115, "grad_norm": 0.15264827013015747, "learning_rate": 1.5121111450747483e-05, "loss": 0.4577, "step": 4491 }, { "epoch": 1.0020075842070042, "grad_norm": 0.15963797271251678, "learning_rate": 1.5119089903434746e-05, "loss": 0.4866, "step": 4492 }, { "epoch": 1.0022306491188937, "grad_norm": 0.16446684300899506, "learning_rate": 1.511706807259461e-05, "loss": 0.4976, "step": 4493 }, { "epoch": 1.002453714030783, "grad_norm": 0.1584995537996292, "learning_rate": 1.5115045958339056e-05, "loss": 0.4921, "step": 4494 }, { "epoch": 1.0026767789426723, "grad_norm": 0.16608034074306488, "learning_rate": 1.5113023560780083e-05, "loss": 0.4946, "step": 4495 }, { "epoch": 1.0028998438545618, "grad_norm": 0.15869465470314026, "learning_rate": 1.5111000880029703e-05, "loss": 0.5022, "step": 4496 }, { "epoch": 1.003122908766451, "grad_norm": 0.16816754639148712, "learning_rate": 1.5108977916199941e-05, "loss": 0.4791, "step": 4497 }, { "epoch": 1.0033459736783403, "grad_norm": 0.17300903797149658, "learning_rate": 1.5106954669402849e-05, "loss": 0.4817, "step": 4498 }, { "epoch": 1.0035690385902298, "grad_norm": 0.17333939671516418, "learning_rate": 1.510493113975048e-05, "loss": 0.4671, "step": 4499 }, { "epoch": 1.0037921035021191, "grad_norm": 0.2043878585100174, "learning_rate": 1.5102907327354919e-05, "loss": 0.5001, "step": 4500 }, { "epoch": 1.0040151684140084, "grad_norm": 0.15702618658542633, "learning_rate": 1.5100883232328247e-05, "loss": 0.5172, "step": 4501 }, { "epoch": 1.004238233325898, "grad_norm": 0.1576346904039383, "learning_rate": 1.5098858854782576e-05, "loss": 0.4813, "step": 4502 }, { "epoch": 1.0044612982377872, "grad_norm": 0.171005517244339, "learning_rate": 1.509683419483003e-05, "loss": 0.5054, "step": 4503 }, { "epoch": 1.0046843631496765, "grad_norm": 0.16706731915473938, "learning_rate": 1.5094809252582744e-05, "loss": 0.4791, "step": 4504 }, { "epoch": 1.004907428061566, "grad_norm": 0.1601940393447876, "learning_rate": 1.5092784028152878e-05, "loss": 0.4788, "step": 4505 }, { "epoch": 1.0051304929734552, "grad_norm": 0.1597285270690918, "learning_rate": 1.5090758521652596e-05, "loss": 0.4669, "step": 4506 }, { "epoch": 1.0053535578853445, "grad_norm": 0.16775481402873993, "learning_rate": 1.5088732733194085e-05, "loss": 0.5091, "step": 4507 }, { "epoch": 1.005576622797234, "grad_norm": 0.16889511048793793, "learning_rate": 1.5086706662889544e-05, "loss": 0.5208, "step": 4508 }, { "epoch": 1.0057996877091233, "grad_norm": 0.1600525975227356, "learning_rate": 1.5084680310851192e-05, "loss": 0.4861, "step": 4509 }, { "epoch": 1.0060227526210128, "grad_norm": 0.18192386627197266, "learning_rate": 1.508265367719126e-05, "loss": 0.486, "step": 4510 }, { "epoch": 1.006245817532902, "grad_norm": 0.1748298555612564, "learning_rate": 1.5080626762021997e-05, "loss": 0.4623, "step": 4511 }, { "epoch": 1.0064688824447914, "grad_norm": 0.1619860827922821, "learning_rate": 1.5078599565455666e-05, "loss": 0.4707, "step": 4512 }, { "epoch": 1.0066919473566809, "grad_norm": 0.16041533648967743, "learning_rate": 1.5076572087604544e-05, "loss": 0.4663, "step": 4513 }, { "epoch": 1.0069150122685702, "grad_norm": 0.1654667854309082, "learning_rate": 1.5074544328580928e-05, "loss": 0.4746, "step": 4514 }, { "epoch": 1.0071380771804594, "grad_norm": 0.1631074994802475, "learning_rate": 1.5072516288497127e-05, "loss": 0.508, "step": 4515 }, { "epoch": 1.007361142092349, "grad_norm": 0.16078722476959229, "learning_rate": 1.5070487967465466e-05, "loss": 0.5053, "step": 4516 }, { "epoch": 1.0075842070042382, "grad_norm": 0.15384818613529205, "learning_rate": 1.5068459365598286e-05, "loss": 0.4896, "step": 4517 }, { "epoch": 1.0078072719161275, "grad_norm": 0.15592217445373535, "learning_rate": 1.5066430483007949e-05, "loss": 0.4911, "step": 4518 }, { "epoch": 1.008030336828017, "grad_norm": 0.1650250107049942, "learning_rate": 1.5064401319806817e-05, "loss": 0.4914, "step": 4519 }, { "epoch": 1.0082534017399063, "grad_norm": 0.16215115785598755, "learning_rate": 1.5062371876107286e-05, "loss": 0.468, "step": 4520 }, { "epoch": 1.0084764666517956, "grad_norm": 0.1609305888414383, "learning_rate": 1.5060342152021757e-05, "loss": 0.5136, "step": 4521 }, { "epoch": 1.008699531563685, "grad_norm": 0.15757685899734497, "learning_rate": 1.505831214766265e-05, "loss": 0.4816, "step": 4522 }, { "epoch": 1.0089225964755744, "grad_norm": 0.15898853540420532, "learning_rate": 1.5056281863142394e-05, "loss": 0.4704, "step": 4523 }, { "epoch": 1.0091456613874636, "grad_norm": 0.16032522916793823, "learning_rate": 1.5054251298573447e-05, "loss": 0.4712, "step": 4524 }, { "epoch": 1.0093687262993531, "grad_norm": 0.1658545881509781, "learning_rate": 1.5052220454068267e-05, "loss": 0.4975, "step": 4525 }, { "epoch": 1.0095917912112424, "grad_norm": 0.15979249775409698, "learning_rate": 1.5050189329739344e-05, "loss": 0.4768, "step": 4526 }, { "epoch": 1.009814856123132, "grad_norm": 0.16390205919742584, "learning_rate": 1.5048157925699162e-05, "loss": 0.5083, "step": 4527 }, { "epoch": 1.0100379210350212, "grad_norm": 0.16372700035572052, "learning_rate": 1.5046126242060247e-05, "loss": 0.4919, "step": 4528 }, { "epoch": 1.0102609859469105, "grad_norm": 0.16318677365779877, "learning_rate": 1.5044094278935115e-05, "loss": 0.4736, "step": 4529 }, { "epoch": 1.0104840508588, "grad_norm": 0.1591407060623169, "learning_rate": 1.5042062036436315e-05, "loss": 0.4911, "step": 4530 }, { "epoch": 1.0107071157706893, "grad_norm": 0.16406004130840302, "learning_rate": 1.5040029514676402e-05, "loss": 0.4897, "step": 4531 }, { "epoch": 1.0109301806825786, "grad_norm": 0.15959376096725464, "learning_rate": 1.5037996713767956e-05, "loss": 0.4638, "step": 4532 }, { "epoch": 1.011153245594468, "grad_norm": 0.15884444117546082, "learning_rate": 1.5035963633823553e-05, "loss": 0.4431, "step": 4533 }, { "epoch": 1.0113763105063573, "grad_norm": 0.15193885564804077, "learning_rate": 1.5033930274955813e-05, "loss": 0.4828, "step": 4534 }, { "epoch": 1.0115993754182466, "grad_norm": 0.16269952058792114, "learning_rate": 1.503189663727735e-05, "loss": 0.4977, "step": 4535 }, { "epoch": 1.0118224403301361, "grad_norm": 0.1688687652349472, "learning_rate": 1.5029862720900794e-05, "loss": 0.4798, "step": 4536 }, { "epoch": 1.0120455052420254, "grad_norm": 0.1625138372182846, "learning_rate": 1.5027828525938809e-05, "loss": 0.4732, "step": 4537 }, { "epoch": 1.0122685701539147, "grad_norm": 0.162174254655838, "learning_rate": 1.5025794052504048e-05, "loss": 0.4912, "step": 4538 }, { "epoch": 1.0124916350658042, "grad_norm": 0.1722794622182846, "learning_rate": 1.5023759300709201e-05, "loss": 0.5365, "step": 4539 }, { "epoch": 1.0127146999776935, "grad_norm": 0.18794307112693787, "learning_rate": 1.5021724270666962e-05, "loss": 0.4874, "step": 4540 }, { "epoch": 1.0129377648895828, "grad_norm": 0.16088199615478516, "learning_rate": 1.5019688962490047e-05, "loss": 0.4658, "step": 4541 }, { "epoch": 1.0131608298014723, "grad_norm": 0.1688738763332367, "learning_rate": 1.501765337629118e-05, "loss": 0.4869, "step": 4542 }, { "epoch": 1.0133838947133615, "grad_norm": 0.2059110552072525, "learning_rate": 1.5015617512183109e-05, "loss": 0.4952, "step": 4543 }, { "epoch": 1.013606959625251, "grad_norm": 0.1552867442369461, "learning_rate": 1.5013581370278587e-05, "loss": 0.4761, "step": 4544 }, { "epoch": 1.0138300245371403, "grad_norm": 0.15534162521362305, "learning_rate": 1.5011544950690397e-05, "loss": 0.4751, "step": 4545 }, { "epoch": 1.0140530894490296, "grad_norm": 0.162929505109787, "learning_rate": 1.5009508253531321e-05, "loss": 0.5241, "step": 4546 }, { "epoch": 1.0142761543609191, "grad_norm": 0.16057923436164856, "learning_rate": 1.5007471278914167e-05, "loss": 0.4944, "step": 4547 }, { "epoch": 1.0144992192728084, "grad_norm": 0.18289563059806824, "learning_rate": 1.5005434026951755e-05, "loss": 0.4852, "step": 4548 }, { "epoch": 1.0147222841846977, "grad_norm": 0.1580429971218109, "learning_rate": 1.5003396497756923e-05, "loss": 0.4776, "step": 4549 }, { "epoch": 1.0149453490965872, "grad_norm": 0.1565399467945099, "learning_rate": 1.5001358691442517e-05, "loss": 0.4785, "step": 4550 }, { "epoch": 1.0151684140084765, "grad_norm": 0.16028526425361633, "learning_rate": 1.4999320608121411e-05, "loss": 0.4797, "step": 4551 }, { "epoch": 1.0153914789203657, "grad_norm": 0.16666477918624878, "learning_rate": 1.499728224790648e-05, "loss": 0.5112, "step": 4552 }, { "epoch": 1.0156145438322552, "grad_norm": 0.164764866232872, "learning_rate": 1.4995243610910625e-05, "loss": 0.4843, "step": 4553 }, { "epoch": 1.0158376087441445, "grad_norm": 0.16802887618541718, "learning_rate": 1.4993204697246758e-05, "loss": 0.4642, "step": 4554 }, { "epoch": 1.0160606736560338, "grad_norm": 0.16424837708473206, "learning_rate": 1.4991165507027802e-05, "loss": 0.471, "step": 4555 }, { "epoch": 1.0162837385679233, "grad_norm": 0.16172267496585846, "learning_rate": 1.498912604036671e-05, "loss": 0.4747, "step": 4556 }, { "epoch": 1.0165068034798126, "grad_norm": 0.1546371579170227, "learning_rate": 1.4987086297376431e-05, "loss": 0.4624, "step": 4557 }, { "epoch": 1.0167298683917019, "grad_norm": 0.15725232660770416, "learning_rate": 1.4985046278169949e-05, "loss": 0.4799, "step": 4558 }, { "epoch": 1.0169529333035914, "grad_norm": 0.1614525318145752, "learning_rate": 1.4983005982860241e-05, "loss": 0.4652, "step": 4559 }, { "epoch": 1.0171759982154807, "grad_norm": 0.1634308397769928, "learning_rate": 1.498096541156032e-05, "loss": 0.4742, "step": 4560 }, { "epoch": 1.0173990631273702, "grad_norm": 0.16403678059577942, "learning_rate": 1.4978924564383202e-05, "loss": 0.4956, "step": 4561 }, { "epoch": 1.0176221280392594, "grad_norm": 0.16014313697814941, "learning_rate": 1.4976883441441924e-05, "loss": 0.4956, "step": 4562 }, { "epoch": 1.0178451929511487, "grad_norm": 0.16341127455234528, "learning_rate": 1.4974842042849533e-05, "loss": 0.5086, "step": 4563 }, { "epoch": 1.0180682578630382, "grad_norm": 0.16314633190631866, "learning_rate": 1.4972800368719098e-05, "loss": 0.4681, "step": 4564 }, { "epoch": 1.0182913227749275, "grad_norm": 0.16120873391628265, "learning_rate": 1.4970758419163698e-05, "loss": 0.4839, "step": 4565 }, { "epoch": 1.0185143876868168, "grad_norm": 0.15808890759944916, "learning_rate": 1.4968716194296429e-05, "loss": 0.4711, "step": 4566 }, { "epoch": 1.0187374525987063, "grad_norm": 0.1576492190361023, "learning_rate": 1.4966673694230406e-05, "loss": 0.4767, "step": 4567 }, { "epoch": 1.0189605175105956, "grad_norm": 0.17056535184383392, "learning_rate": 1.4964630919078747e-05, "loss": 0.4551, "step": 4568 }, { "epoch": 1.0191835824224849, "grad_norm": 0.15599535405635834, "learning_rate": 1.4962587868954603e-05, "loss": 0.462, "step": 4569 }, { "epoch": 1.0194066473343744, "grad_norm": 0.16496673226356506, "learning_rate": 1.4960544543971125e-05, "loss": 0.4903, "step": 4570 }, { "epoch": 1.0196297122462636, "grad_norm": 0.16336064040660858, "learning_rate": 1.4958500944241488e-05, "loss": 0.4804, "step": 4571 }, { "epoch": 1.019852777158153, "grad_norm": 0.1574566662311554, "learning_rate": 1.4956457069878875e-05, "loss": 0.4405, "step": 4572 }, { "epoch": 1.0200758420700424, "grad_norm": 0.1587173491716385, "learning_rate": 1.4954412920996498e-05, "loss": 0.4729, "step": 4573 }, { "epoch": 1.0202989069819317, "grad_norm": 0.16297802329063416, "learning_rate": 1.4952368497707566e-05, "loss": 0.475, "step": 4574 }, { "epoch": 1.020521971893821, "grad_norm": 0.16890546679496765, "learning_rate": 1.4950323800125314e-05, "loss": 0.4715, "step": 4575 }, { "epoch": 1.0207450368057105, "grad_norm": 0.16996879875659943, "learning_rate": 1.4948278828362991e-05, "loss": 0.4772, "step": 4576 }, { "epoch": 1.0209681017175998, "grad_norm": 0.16053706407546997, "learning_rate": 1.4946233582533865e-05, "loss": 0.4763, "step": 4577 }, { "epoch": 1.0211911666294893, "grad_norm": 0.15968559682369232, "learning_rate": 1.4944188062751207e-05, "loss": 0.4593, "step": 4578 }, { "epoch": 1.0214142315413786, "grad_norm": 0.15733575820922852, "learning_rate": 1.4942142269128312e-05, "loss": 0.462, "step": 4579 }, { "epoch": 1.0216372964532678, "grad_norm": 0.1631694734096527, "learning_rate": 1.4940096201778493e-05, "loss": 0.4845, "step": 4580 }, { "epoch": 1.0218603613651573, "grad_norm": 0.15469685196876526, "learning_rate": 1.4938049860815072e-05, "loss": 0.4726, "step": 4581 }, { "epoch": 1.0220834262770466, "grad_norm": 0.16130223870277405, "learning_rate": 1.4936003246351386e-05, "loss": 0.4966, "step": 4582 }, { "epoch": 1.022306491188936, "grad_norm": 0.19383056461811066, "learning_rate": 1.4933956358500794e-05, "loss": 0.5019, "step": 4583 }, { "epoch": 1.0225295561008254, "grad_norm": 0.16322484612464905, "learning_rate": 1.4931909197376664e-05, "loss": 0.4344, "step": 4584 }, { "epoch": 1.0227526210127147, "grad_norm": 0.15319214761257172, "learning_rate": 1.4929861763092378e-05, "loss": 0.475, "step": 4585 }, { "epoch": 1.022975685924604, "grad_norm": 0.16148288547992706, "learning_rate": 1.4927814055761336e-05, "loss": 0.5031, "step": 4586 }, { "epoch": 1.0231987508364935, "grad_norm": 0.17277774214744568, "learning_rate": 1.4925766075496955e-05, "loss": 0.5255, "step": 4587 }, { "epoch": 1.0234218157483828, "grad_norm": 0.16420382261276245, "learning_rate": 1.4923717822412666e-05, "loss": 0.4811, "step": 4588 }, { "epoch": 1.023644880660272, "grad_norm": 0.16079513728618622, "learning_rate": 1.4921669296621912e-05, "loss": 0.4742, "step": 4589 }, { "epoch": 1.0238679455721615, "grad_norm": 0.1617536097764969, "learning_rate": 1.4919620498238154e-05, "loss": 0.4818, "step": 4590 }, { "epoch": 1.0240910104840508, "grad_norm": 0.15949992835521698, "learning_rate": 1.4917571427374866e-05, "loss": 0.4697, "step": 4591 }, { "epoch": 1.0243140753959403, "grad_norm": 0.1637139767408371, "learning_rate": 1.4915522084145542e-05, "loss": 0.4507, "step": 4592 }, { "epoch": 1.0245371403078296, "grad_norm": 0.16350442171096802, "learning_rate": 1.4913472468663681e-05, "loss": 0.4676, "step": 4593 }, { "epoch": 1.024760205219719, "grad_norm": 0.1604386270046234, "learning_rate": 1.4911422581042812e-05, "loss": 0.4941, "step": 4594 }, { "epoch": 1.0249832701316084, "grad_norm": 0.16599829494953156, "learning_rate": 1.4909372421396464e-05, "loss": 0.486, "step": 4595 }, { "epoch": 1.0252063350434977, "grad_norm": 0.24195000529289246, "learning_rate": 1.4907321989838192e-05, "loss": 0.4824, "step": 4596 }, { "epoch": 1.025429399955387, "grad_norm": 0.1621350198984146, "learning_rate": 1.4905271286481557e-05, "loss": 0.4602, "step": 4597 }, { "epoch": 1.0256524648672765, "grad_norm": 0.16584086418151855, "learning_rate": 1.4903220311440147e-05, "loss": 0.4581, "step": 4598 }, { "epoch": 1.0258755297791657, "grad_norm": 0.16808323562145233, "learning_rate": 1.4901169064827552e-05, "loss": 0.4756, "step": 4599 }, { "epoch": 1.026098594691055, "grad_norm": 0.15748214721679688, "learning_rate": 1.4899117546757383e-05, "loss": 0.4366, "step": 4600 }, { "epoch": 1.0263216596029445, "grad_norm": 0.18983392417430878, "learning_rate": 1.489706575734327e-05, "loss": 0.4447, "step": 4601 }, { "epoch": 1.0265447245148338, "grad_norm": 0.16951000690460205, "learning_rate": 1.4895013696698847e-05, "loss": 0.4608, "step": 4602 }, { "epoch": 1.026767789426723, "grad_norm": 0.1630699634552002, "learning_rate": 1.4892961364937779e-05, "loss": 0.4773, "step": 4603 }, { "epoch": 1.0269908543386126, "grad_norm": 0.15806427597999573, "learning_rate": 1.4890908762173731e-05, "loss": 0.4579, "step": 4604 }, { "epoch": 1.0272139192505019, "grad_norm": 0.16593055427074432, "learning_rate": 1.4888855888520393e-05, "loss": 0.4622, "step": 4605 }, { "epoch": 1.0274369841623912, "grad_norm": 0.16501964628696442, "learning_rate": 1.488680274409146e-05, "loss": 0.4793, "step": 4606 }, { "epoch": 1.0276600490742807, "grad_norm": 0.1590343415737152, "learning_rate": 1.4884749329000654e-05, "loss": 0.4562, "step": 4607 }, { "epoch": 1.02788311398617, "grad_norm": 0.1620016247034073, "learning_rate": 1.4882695643361702e-05, "loss": 0.4836, "step": 4608 }, { "epoch": 1.0281061788980594, "grad_norm": 0.1608029156923294, "learning_rate": 1.4880641687288356e-05, "loss": 0.4819, "step": 4609 }, { "epoch": 1.0283292438099487, "grad_norm": 0.18065601587295532, "learning_rate": 1.4878587460894369e-05, "loss": 0.4966, "step": 4610 }, { "epoch": 1.028552308721838, "grad_norm": 0.16342510282993317, "learning_rate": 1.4876532964293522e-05, "loss": 0.504, "step": 4611 }, { "epoch": 1.0287753736337275, "grad_norm": 0.16731272637844086, "learning_rate": 1.4874478197599605e-05, "loss": 0.4517, "step": 4612 }, { "epoch": 1.0289984385456168, "grad_norm": 0.17315874993801117, "learning_rate": 1.4872423160926424e-05, "loss": 0.4704, "step": 4613 }, { "epoch": 1.029221503457506, "grad_norm": 0.16523095965385437, "learning_rate": 1.48703678543878e-05, "loss": 0.4943, "step": 4614 }, { "epoch": 1.0294445683693956, "grad_norm": 0.1586511880159378, "learning_rate": 1.4868312278097568e-05, "loss": 0.486, "step": 4615 }, { "epoch": 1.0296676332812849, "grad_norm": 0.167024627327919, "learning_rate": 1.4866256432169577e-05, "loss": 0.4898, "step": 4616 }, { "epoch": 1.0298906981931741, "grad_norm": 0.16019749641418457, "learning_rate": 1.4864200316717698e-05, "loss": 0.4738, "step": 4617 }, { "epoch": 1.0301137631050636, "grad_norm": 0.16836729645729065, "learning_rate": 1.4862143931855806e-05, "loss": 0.4597, "step": 4618 }, { "epoch": 1.030336828016953, "grad_norm": 0.1572437733411789, "learning_rate": 1.4860087277697798e-05, "loss": 0.495, "step": 4619 }, { "epoch": 1.0305598929288422, "grad_norm": 0.1620069444179535, "learning_rate": 1.4858030354357588e-05, "loss": 0.4681, "step": 4620 }, { "epoch": 1.0307829578407317, "grad_norm": 0.1601821482181549, "learning_rate": 1.4855973161949097e-05, "loss": 0.4803, "step": 4621 }, { "epoch": 1.031006022752621, "grad_norm": 0.16902416944503784, "learning_rate": 1.485391570058627e-05, "loss": 0.479, "step": 4622 }, { "epoch": 1.0312290876645103, "grad_norm": 0.15522709488868713, "learning_rate": 1.4851857970383057e-05, "loss": 0.4737, "step": 4623 }, { "epoch": 1.0314521525763998, "grad_norm": 0.15457487106323242, "learning_rate": 1.484979997145343e-05, "loss": 0.4663, "step": 4624 }, { "epoch": 1.031675217488289, "grad_norm": 0.16563165187835693, "learning_rate": 1.4847741703911376e-05, "loss": 0.4754, "step": 4625 }, { "epoch": 1.0318982824001786, "grad_norm": 0.16526126861572266, "learning_rate": 1.4845683167870891e-05, "loss": 0.4743, "step": 4626 }, { "epoch": 1.0321213473120678, "grad_norm": 0.1640041321516037, "learning_rate": 1.4843624363445992e-05, "loss": 0.4995, "step": 4627 }, { "epoch": 1.0323444122239571, "grad_norm": 0.17919553816318512, "learning_rate": 1.484156529075071e-05, "loss": 0.4622, "step": 4628 }, { "epoch": 1.0325674771358466, "grad_norm": 0.1707811802625656, "learning_rate": 1.4839505949899084e-05, "loss": 0.5257, "step": 4629 }, { "epoch": 1.032790542047736, "grad_norm": 0.16861692070960999, "learning_rate": 1.4837446341005179e-05, "loss": 0.4661, "step": 4630 }, { "epoch": 1.0330136069596252, "grad_norm": 0.15601199865341187, "learning_rate": 1.4835386464183066e-05, "loss": 0.4704, "step": 4631 }, { "epoch": 1.0332366718715147, "grad_norm": 0.16105563938617706, "learning_rate": 1.4833326319546837e-05, "loss": 0.4822, "step": 4632 }, { "epoch": 1.033459736783404, "grad_norm": 0.1516101360321045, "learning_rate": 1.483126590721059e-05, "loss": 0.4812, "step": 4633 }, { "epoch": 1.0336828016952933, "grad_norm": 0.16206029057502747, "learning_rate": 1.4829205227288451e-05, "loss": 0.4827, "step": 4634 }, { "epoch": 1.0339058666071828, "grad_norm": 0.16983060538768768, "learning_rate": 1.4827144279894547e-05, "loss": 0.4507, "step": 4635 }, { "epoch": 1.034128931519072, "grad_norm": 0.17058612406253815, "learning_rate": 1.4825083065143029e-05, "loss": 0.4606, "step": 4636 }, { "epoch": 1.0343519964309613, "grad_norm": 0.15379010140895844, "learning_rate": 1.482302158314806e-05, "loss": 0.4365, "step": 4637 }, { "epoch": 1.0345750613428508, "grad_norm": 0.1577761024236679, "learning_rate": 1.4820959834023821e-05, "loss": 0.4678, "step": 4638 }, { "epoch": 1.03479812625474, "grad_norm": 0.16535353660583496, "learning_rate": 1.4818897817884499e-05, "loss": 0.4994, "step": 4639 }, { "epoch": 1.0350211911666294, "grad_norm": 0.16018177568912506, "learning_rate": 1.48168355348443e-05, "loss": 0.486, "step": 4640 }, { "epoch": 1.0352442560785189, "grad_norm": 0.16746951639652252, "learning_rate": 1.4814772985017456e-05, "loss": 0.4665, "step": 4641 }, { "epoch": 1.0354673209904082, "grad_norm": 0.1544388234615326, "learning_rate": 1.4812710168518195e-05, "loss": 0.4707, "step": 4642 }, { "epoch": 1.0356903859022977, "grad_norm": 0.15881603956222534, "learning_rate": 1.4810647085460771e-05, "loss": 0.4727, "step": 4643 }, { "epoch": 1.035913450814187, "grad_norm": 0.15742497146129608, "learning_rate": 1.4808583735959453e-05, "loss": 0.4864, "step": 4644 }, { "epoch": 1.0361365157260762, "grad_norm": 0.15278148651123047, "learning_rate": 1.4806520120128518e-05, "loss": 0.4586, "step": 4645 }, { "epoch": 1.0363595806379657, "grad_norm": 0.16195416450500488, "learning_rate": 1.4804456238082266e-05, "loss": 0.4532, "step": 4646 }, { "epoch": 1.036582645549855, "grad_norm": 0.17061328887939453, "learning_rate": 1.480239208993501e-05, "loss": 0.4568, "step": 4647 }, { "epoch": 1.0368057104617443, "grad_norm": 0.16437166929244995, "learning_rate": 1.4800327675801065e-05, "loss": 0.4751, "step": 4648 }, { "epoch": 1.0370287753736338, "grad_norm": 0.15695516765117645, "learning_rate": 1.4798262995794784e-05, "loss": 0.4701, "step": 4649 }, { "epoch": 1.037251840285523, "grad_norm": 0.16812454164028168, "learning_rate": 1.479619805003051e-05, "loss": 0.4543, "step": 4650 }, { "epoch": 1.0374749051974124, "grad_norm": 0.16520899534225464, "learning_rate": 1.4794132838622624e-05, "loss": 0.4576, "step": 4651 }, { "epoch": 1.0376979701093019, "grad_norm": 0.155088871717453, "learning_rate": 1.4792067361685503e-05, "loss": 0.4775, "step": 4652 }, { "epoch": 1.0379210350211912, "grad_norm": 0.15887859463691711, "learning_rate": 1.4790001619333547e-05, "loss": 0.4642, "step": 4653 }, { "epoch": 1.0381440999330804, "grad_norm": 0.17838329076766968, "learning_rate": 1.4787935611681174e-05, "loss": 0.4821, "step": 4654 }, { "epoch": 1.03836716484497, "grad_norm": 0.15603458881378174, "learning_rate": 1.4785869338842807e-05, "loss": 0.4581, "step": 4655 }, { "epoch": 1.0385902297568592, "grad_norm": 0.16940052807331085, "learning_rate": 1.4783802800932894e-05, "loss": 0.5005, "step": 4656 }, { "epoch": 1.0388132946687485, "grad_norm": 0.1598014533519745, "learning_rate": 1.4781735998065886e-05, "loss": 0.4866, "step": 4657 }, { "epoch": 1.039036359580638, "grad_norm": 0.1550016701221466, "learning_rate": 1.4779668930356265e-05, "loss": 0.4531, "step": 4658 }, { "epoch": 1.0392594244925273, "grad_norm": 0.17037729918956757, "learning_rate": 1.4777601597918511e-05, "loss": 0.4867, "step": 4659 }, { "epoch": 1.0394824894044168, "grad_norm": 0.15960454940795898, "learning_rate": 1.477553400086713e-05, "loss": 0.4768, "step": 4660 }, { "epoch": 1.039705554316306, "grad_norm": 0.17182455956935883, "learning_rate": 1.4773466139316634e-05, "loss": 0.4833, "step": 4661 }, { "epoch": 1.0399286192281954, "grad_norm": 0.1634978950023651, "learning_rate": 1.4771398013381559e-05, "loss": 0.4591, "step": 4662 }, { "epoch": 1.0401516841400849, "grad_norm": 0.1541091501712799, "learning_rate": 1.476932962317645e-05, "loss": 0.4611, "step": 4663 }, { "epoch": 1.0403747490519741, "grad_norm": 0.16876503825187683, "learning_rate": 1.4767260968815864e-05, "loss": 0.497, "step": 4664 }, { "epoch": 1.0405978139638634, "grad_norm": 0.16533160209655762, "learning_rate": 1.4765192050414378e-05, "loss": 0.4818, "step": 4665 }, { "epoch": 1.040820878875753, "grad_norm": 0.1572985202074051, "learning_rate": 1.4763122868086584e-05, "loss": 0.4748, "step": 4666 }, { "epoch": 1.0410439437876422, "grad_norm": 0.16402465105056763, "learning_rate": 1.4761053421947082e-05, "loss": 0.4819, "step": 4667 }, { "epoch": 1.0412670086995315, "grad_norm": 0.15840837359428406, "learning_rate": 1.4758983712110494e-05, "loss": 0.4781, "step": 4668 }, { "epoch": 1.041490073611421, "grad_norm": 0.1562443971633911, "learning_rate": 1.4756913738691451e-05, "loss": 0.4784, "step": 4669 }, { "epoch": 1.0417131385233103, "grad_norm": 0.1598626971244812, "learning_rate": 1.4754843501804607e-05, "loss": 0.4846, "step": 4670 }, { "epoch": 1.0419362034351995, "grad_norm": 0.1586054414510727, "learning_rate": 1.4752773001564617e-05, "loss": 0.4819, "step": 4671 }, { "epoch": 1.042159268347089, "grad_norm": 0.16802629828453064, "learning_rate": 1.4750702238086164e-05, "loss": 0.4851, "step": 4672 }, { "epoch": 1.0423823332589783, "grad_norm": 0.16435806453227997, "learning_rate": 1.474863121148394e-05, "loss": 0.4617, "step": 4673 }, { "epoch": 1.0426053981708678, "grad_norm": 0.1635981649160385, "learning_rate": 1.4746559921872645e-05, "loss": 0.4708, "step": 4674 }, { "epoch": 1.0428284630827571, "grad_norm": 0.1575925201177597, "learning_rate": 1.4744488369367007e-05, "loss": 0.4662, "step": 4675 }, { "epoch": 1.0430515279946464, "grad_norm": 0.16302283108234406, "learning_rate": 1.474241655408176e-05, "loss": 0.4632, "step": 4676 }, { "epoch": 1.043274592906536, "grad_norm": 0.1619601547718048, "learning_rate": 1.4740344476131652e-05, "loss": 0.4489, "step": 4677 }, { "epoch": 1.0434976578184252, "grad_norm": 0.16791771352291107, "learning_rate": 1.4738272135631448e-05, "loss": 0.5023, "step": 4678 }, { "epoch": 1.0437207227303145, "grad_norm": 0.16237159073352814, "learning_rate": 1.4736199532695929e-05, "loss": 0.4603, "step": 4679 }, { "epoch": 1.043943787642204, "grad_norm": 0.1556091606616974, "learning_rate": 1.473412666743989e-05, "loss": 0.4466, "step": 4680 }, { "epoch": 1.0441668525540932, "grad_norm": 0.15769942104816437, "learning_rate": 1.4732053539978138e-05, "loss": 0.4532, "step": 4681 }, { "epoch": 1.0443899174659825, "grad_norm": 0.15660084784030914, "learning_rate": 1.472998015042549e-05, "loss": 0.4862, "step": 4682 }, { "epoch": 1.044612982377872, "grad_norm": 0.15516437590122223, "learning_rate": 1.4727906498896793e-05, "loss": 0.4756, "step": 4683 }, { "epoch": 1.0448360472897613, "grad_norm": 0.16473138332366943, "learning_rate": 1.4725832585506891e-05, "loss": 0.4937, "step": 4684 }, { "epoch": 1.0450591122016506, "grad_norm": 0.15583591163158417, "learning_rate": 1.4723758410370654e-05, "loss": 0.4732, "step": 4685 }, { "epoch": 1.04528217711354, "grad_norm": 0.15941470861434937, "learning_rate": 1.4721683973602965e-05, "loss": 0.4457, "step": 4686 }, { "epoch": 1.0455052420254294, "grad_norm": 0.1659325659275055, "learning_rate": 1.4719609275318715e-05, "loss": 0.4989, "step": 4687 }, { "epoch": 1.0457283069373187, "grad_norm": 0.1708594560623169, "learning_rate": 1.4717534315632817e-05, "loss": 0.4833, "step": 4688 }, { "epoch": 1.0459513718492082, "grad_norm": 0.1607704609632492, "learning_rate": 1.4715459094660194e-05, "loss": 0.4419, "step": 4689 }, { "epoch": 1.0461744367610974, "grad_norm": 0.16482210159301758, "learning_rate": 1.4713383612515786e-05, "loss": 0.4686, "step": 4690 }, { "epoch": 1.046397501672987, "grad_norm": 0.16981108486652374, "learning_rate": 1.4711307869314544e-05, "loss": 0.4961, "step": 4691 }, { "epoch": 1.0466205665848762, "grad_norm": 0.16742509603500366, "learning_rate": 1.4709231865171436e-05, "loss": 0.4972, "step": 4692 }, { "epoch": 1.0468436314967655, "grad_norm": 0.16613337397575378, "learning_rate": 1.4707155600201447e-05, "loss": 0.4803, "step": 4693 }, { "epoch": 1.047066696408655, "grad_norm": 0.18546496331691742, "learning_rate": 1.470507907451957e-05, "loss": 0.4678, "step": 4694 }, { "epoch": 1.0472897613205443, "grad_norm": 0.1629878729581833, "learning_rate": 1.4703002288240818e-05, "loss": 0.4479, "step": 4695 }, { "epoch": 1.0475128262324336, "grad_norm": 0.1560726761817932, "learning_rate": 1.4700925241480217e-05, "loss": 0.451, "step": 4696 }, { "epoch": 1.047735891144323, "grad_norm": 0.458696573972702, "learning_rate": 1.4698847934352804e-05, "loss": 0.493, "step": 4697 }, { "epoch": 1.0479589560562124, "grad_norm": 0.1607166975736618, "learning_rate": 1.4696770366973639e-05, "loss": 0.4688, "step": 4698 }, { "epoch": 1.0481820209681016, "grad_norm": 0.15131288766860962, "learning_rate": 1.4694692539457784e-05, "loss": 0.4551, "step": 4699 }, { "epoch": 1.0484050858799911, "grad_norm": 0.16862881183624268, "learning_rate": 1.4692614451920328e-05, "loss": 0.4579, "step": 4700 }, { "epoch": 1.0486281507918804, "grad_norm": 0.16511371731758118, "learning_rate": 1.4690536104476364e-05, "loss": 0.4887, "step": 4701 }, { "epoch": 1.0488512157037697, "grad_norm": 0.16970515251159668, "learning_rate": 1.4688457497241006e-05, "loss": 0.4494, "step": 4702 }, { "epoch": 1.0490742806156592, "grad_norm": 0.16722868382930756, "learning_rate": 1.4686378630329382e-05, "loss": 0.4781, "step": 4703 }, { "epoch": 1.0492973455275485, "grad_norm": 0.16638034582138062, "learning_rate": 1.4684299503856627e-05, "loss": 0.4705, "step": 4704 }, { "epoch": 1.0495204104394378, "grad_norm": 0.18224425613880157, "learning_rate": 1.4682220117937904e-05, "loss": 0.4902, "step": 4705 }, { "epoch": 1.0497434753513273, "grad_norm": 0.15631026029586792, "learning_rate": 1.4680140472688376e-05, "loss": 0.448, "step": 4706 }, { "epoch": 1.0499665402632166, "grad_norm": 0.15947161614894867, "learning_rate": 1.4678060568223232e-05, "loss": 0.453, "step": 4707 }, { "epoch": 1.050189605175106, "grad_norm": 0.15899132192134857, "learning_rate": 1.4675980404657666e-05, "loss": 0.4594, "step": 4708 }, { "epoch": 1.0504126700869953, "grad_norm": 0.1654270440340042, "learning_rate": 1.4673899982106892e-05, "loss": 0.482, "step": 4709 }, { "epoch": 1.0506357349988846, "grad_norm": 0.16609270870685577, "learning_rate": 1.4671819300686136e-05, "loss": 0.5132, "step": 4710 }, { "epoch": 1.0508587999107741, "grad_norm": 0.15867236256599426, "learning_rate": 1.4669738360510643e-05, "loss": 0.4499, "step": 4711 }, { "epoch": 1.0510818648226634, "grad_norm": 0.1547602266073227, "learning_rate": 1.4667657161695663e-05, "loss": 0.4438, "step": 4712 }, { "epoch": 1.0513049297345527, "grad_norm": 0.1628340631723404, "learning_rate": 1.4665575704356472e-05, "loss": 0.4922, "step": 4713 }, { "epoch": 1.0515279946464422, "grad_norm": 0.15342377126216888, "learning_rate": 1.4663493988608348e-05, "loss": 0.4448, "step": 4714 }, { "epoch": 1.0517510595583315, "grad_norm": 0.15994024276733398, "learning_rate": 1.4661412014566594e-05, "loss": 0.4655, "step": 4715 }, { "epoch": 1.0519741244702208, "grad_norm": 0.17778268456459045, "learning_rate": 1.4659329782346518e-05, "loss": 0.5056, "step": 4716 }, { "epoch": 1.0521971893821103, "grad_norm": 0.16450022161006927, "learning_rate": 1.4657247292063455e-05, "loss": 0.4914, "step": 4717 }, { "epoch": 1.0524202542939995, "grad_norm": 0.15438216924667358, "learning_rate": 1.4655164543832738e-05, "loss": 0.4513, "step": 4718 }, { "epoch": 1.0526433192058888, "grad_norm": 0.165638267993927, "learning_rate": 1.4653081537769729e-05, "loss": 0.487, "step": 4719 }, { "epoch": 1.0528663841177783, "grad_norm": 0.1668267548084259, "learning_rate": 1.4650998273989794e-05, "loss": 0.4936, "step": 4720 }, { "epoch": 1.0530894490296676, "grad_norm": 0.19577254354953766, "learning_rate": 1.464891475260832e-05, "loss": 0.4849, "step": 4721 }, { "epoch": 1.053312513941557, "grad_norm": 0.16912342607975006, "learning_rate": 1.4646830973740703e-05, "loss": 0.4712, "step": 4722 }, { "epoch": 1.0535355788534464, "grad_norm": 0.17454881966114044, "learning_rate": 1.4644746937502356e-05, "loss": 0.4945, "step": 4723 }, { "epoch": 1.0537586437653357, "grad_norm": 0.16234014928340912, "learning_rate": 1.464266264400871e-05, "loss": 0.4644, "step": 4724 }, { "epoch": 1.0539817086772252, "grad_norm": 0.16444821655750275, "learning_rate": 1.46405780933752e-05, "loss": 0.4698, "step": 4725 }, { "epoch": 1.0542047735891145, "grad_norm": 0.15134121477603912, "learning_rate": 1.4638493285717286e-05, "loss": 0.4545, "step": 4726 }, { "epoch": 1.0544278385010037, "grad_norm": 0.16439802944660187, "learning_rate": 1.4636408221150436e-05, "loss": 0.4881, "step": 4727 }, { "epoch": 1.0546509034128932, "grad_norm": 0.16219298541545868, "learning_rate": 1.4634322899790137e-05, "loss": 0.4501, "step": 4728 }, { "epoch": 1.0548739683247825, "grad_norm": 0.1483144313097, "learning_rate": 1.463223732175188e-05, "loss": 0.4461, "step": 4729 }, { "epoch": 1.0550970332366718, "grad_norm": 0.16766944527626038, "learning_rate": 1.4630151487151188e-05, "loss": 0.4742, "step": 4730 }, { "epoch": 1.0553200981485613, "grad_norm": 0.17085810005664825, "learning_rate": 1.4628065396103576e-05, "loss": 0.4917, "step": 4731 }, { "epoch": 1.0555431630604506, "grad_norm": 0.18476352095603943, "learning_rate": 1.4625979048724594e-05, "loss": 0.4518, "step": 4732 }, { "epoch": 1.0557662279723399, "grad_norm": 0.1589956432580948, "learning_rate": 1.4623892445129792e-05, "loss": 0.4732, "step": 4733 }, { "epoch": 1.0559892928842294, "grad_norm": 0.18277813494205475, "learning_rate": 1.4621805585434744e-05, "loss": 0.4752, "step": 4734 }, { "epoch": 1.0562123577961187, "grad_norm": 0.15718622505664825, "learning_rate": 1.4619718469755029e-05, "loss": 0.4815, "step": 4735 }, { "epoch": 1.056435422708008, "grad_norm": 0.1619989573955536, "learning_rate": 1.4617631098206244e-05, "loss": 0.4595, "step": 4736 }, { "epoch": 1.0566584876198974, "grad_norm": 0.16704005002975464, "learning_rate": 1.4615543470904005e-05, "loss": 0.4769, "step": 4737 }, { "epoch": 1.0568815525317867, "grad_norm": 0.16695329546928406, "learning_rate": 1.4613455587963934e-05, "loss": 0.4868, "step": 4738 }, { "epoch": 1.057104617443676, "grad_norm": 0.165815070271492, "learning_rate": 1.4611367449501674e-05, "loss": 0.4601, "step": 4739 }, { "epoch": 1.0573276823555655, "grad_norm": 0.1685144156217575, "learning_rate": 1.4609279055632878e-05, "loss": 0.4848, "step": 4740 }, { "epoch": 1.0575507472674548, "grad_norm": 0.16440360248088837, "learning_rate": 1.4607190406473214e-05, "loss": 0.4729, "step": 4741 }, { "epoch": 1.0577738121793443, "grad_norm": 0.16086186468601227, "learning_rate": 1.4605101502138363e-05, "loss": 0.4719, "step": 4742 }, { "epoch": 1.0579968770912336, "grad_norm": 0.16252481937408447, "learning_rate": 1.4603012342744027e-05, "loss": 0.4739, "step": 4743 }, { "epoch": 1.0582199420031229, "grad_norm": 0.15817517042160034, "learning_rate": 1.4600922928405911e-05, "loss": 0.4982, "step": 4744 }, { "epoch": 1.0584430069150124, "grad_norm": 0.15263493359088898, "learning_rate": 1.4598833259239746e-05, "loss": 0.4645, "step": 4745 }, { "epoch": 1.0586660718269016, "grad_norm": 0.2680174708366394, "learning_rate": 1.4596743335361263e-05, "loss": 0.4876, "step": 4746 }, { "epoch": 1.058889136738791, "grad_norm": 0.16394802927970886, "learning_rate": 1.4594653156886222e-05, "loss": 0.4812, "step": 4747 }, { "epoch": 1.0591122016506804, "grad_norm": 0.1608896255493164, "learning_rate": 1.4592562723930385e-05, "loss": 0.4755, "step": 4748 }, { "epoch": 1.0593352665625697, "grad_norm": 0.17238877713680267, "learning_rate": 1.459047203660954e-05, "loss": 0.4926, "step": 4749 }, { "epoch": 1.059558331474459, "grad_norm": 0.16521191596984863, "learning_rate": 1.4588381095039474e-05, "loss": 0.4937, "step": 4750 }, { "epoch": 1.0597813963863485, "grad_norm": 0.1620710790157318, "learning_rate": 1.4586289899336003e-05, "loss": 0.4339, "step": 4751 }, { "epoch": 1.0600044612982378, "grad_norm": 0.15963472425937653, "learning_rate": 1.4584198449614947e-05, "loss": 0.4546, "step": 4752 }, { "epoch": 1.060227526210127, "grad_norm": 0.17011800408363342, "learning_rate": 1.4582106745992149e-05, "loss": 0.4761, "step": 4753 }, { "epoch": 1.0604505911220166, "grad_norm": 0.15755783021450043, "learning_rate": 1.4580014788583452e-05, "loss": 0.4621, "step": 4754 }, { "epoch": 1.0606736560339058, "grad_norm": 0.1612185388803482, "learning_rate": 1.457792257750473e-05, "loss": 0.4526, "step": 4755 }, { "epoch": 1.0608967209457951, "grad_norm": 0.17329202592372894, "learning_rate": 1.4575830112871855e-05, "loss": 0.472, "step": 4756 }, { "epoch": 1.0611197858576846, "grad_norm": 0.16672907769680023, "learning_rate": 1.4573737394800729e-05, "loss": 0.4898, "step": 4757 }, { "epoch": 1.061342850769574, "grad_norm": 0.17075766623020172, "learning_rate": 1.4571644423407257e-05, "loss": 0.474, "step": 4758 }, { "epoch": 1.0615659156814634, "grad_norm": 0.15704765915870667, "learning_rate": 1.4569551198807357e-05, "loss": 0.4951, "step": 4759 }, { "epoch": 1.0617889805933527, "grad_norm": 0.1504676192998886, "learning_rate": 1.4567457721116971e-05, "loss": 0.4597, "step": 4760 }, { "epoch": 1.062012045505242, "grad_norm": 0.15645162761211395, "learning_rate": 1.4565363990452046e-05, "loss": 0.4548, "step": 4761 }, { "epoch": 1.0622351104171315, "grad_norm": 0.16380253434181213, "learning_rate": 1.4563270006928544e-05, "loss": 0.4675, "step": 4762 }, { "epoch": 1.0624581753290208, "grad_norm": 0.16168953478336334, "learning_rate": 1.4561175770662446e-05, "loss": 0.5015, "step": 4763 }, { "epoch": 1.06268124024091, "grad_norm": 0.16128864884376526, "learning_rate": 1.4559081281769742e-05, "loss": 0.4827, "step": 4764 }, { "epoch": 1.0629043051527995, "grad_norm": 0.1598096489906311, "learning_rate": 1.4556986540366437e-05, "loss": 0.4598, "step": 4765 }, { "epoch": 1.0631273700646888, "grad_norm": 0.1796981692314148, "learning_rate": 1.4554891546568557e-05, "loss": 0.4803, "step": 4766 }, { "epoch": 1.063350434976578, "grad_norm": 0.15178261697292328, "learning_rate": 1.4552796300492129e-05, "loss": 0.4228, "step": 4767 }, { "epoch": 1.0635734998884676, "grad_norm": 0.15565361082553864, "learning_rate": 1.4550700802253203e-05, "loss": 0.484, "step": 4768 }, { "epoch": 1.063796564800357, "grad_norm": 0.16248026490211487, "learning_rate": 1.4548605051967843e-05, "loss": 0.448, "step": 4769 }, { "epoch": 1.0640196297122462, "grad_norm": 0.17214354872703552, "learning_rate": 1.4546509049752122e-05, "loss": 0.466, "step": 4770 }, { "epoch": 1.0642426946241357, "grad_norm": 0.1753188818693161, "learning_rate": 1.4544412795722135e-05, "loss": 0.4861, "step": 4771 }, { "epoch": 1.064465759536025, "grad_norm": 0.16392984986305237, "learning_rate": 1.4542316289993976e-05, "loss": 0.4584, "step": 4772 }, { "epoch": 1.0646888244479142, "grad_norm": 0.16576439142227173, "learning_rate": 1.4540219532683774e-05, "loss": 0.456, "step": 4773 }, { "epoch": 1.0649118893598037, "grad_norm": 0.17842838168144226, "learning_rate": 1.4538122523907651e-05, "loss": 0.471, "step": 4774 }, { "epoch": 1.065134954271693, "grad_norm": 0.15883475542068481, "learning_rate": 1.4536025263781762e-05, "loss": 0.4868, "step": 4775 }, { "epoch": 1.0653580191835825, "grad_norm": 0.15571808815002441, "learning_rate": 1.4533927752422256e-05, "loss": 0.4742, "step": 4776 }, { "epoch": 1.0655810840954718, "grad_norm": 0.16916531324386597, "learning_rate": 1.4531829989945315e-05, "loss": 0.4646, "step": 4777 }, { "epoch": 1.065804149007361, "grad_norm": 0.15617609024047852, "learning_rate": 1.4529731976467119e-05, "loss": 0.4569, "step": 4778 }, { "epoch": 1.0660272139192506, "grad_norm": 0.15956276655197144, "learning_rate": 1.4527633712103875e-05, "loss": 0.4739, "step": 4779 }, { "epoch": 1.0662502788311399, "grad_norm": 0.1674896627664566, "learning_rate": 1.4525535196971797e-05, "loss": 0.4993, "step": 4780 }, { "epoch": 1.0664733437430292, "grad_norm": 0.14718051254749298, "learning_rate": 1.4523436431187112e-05, "loss": 0.4433, "step": 4781 }, { "epoch": 1.0666964086549187, "grad_norm": 0.16965600848197937, "learning_rate": 1.4521337414866064e-05, "loss": 0.496, "step": 4782 }, { "epoch": 1.066919473566808, "grad_norm": 0.16213834285736084, "learning_rate": 1.451923814812491e-05, "loss": 0.4385, "step": 4783 }, { "epoch": 1.0671425384786972, "grad_norm": 0.16705723106861115, "learning_rate": 1.451713863107992e-05, "loss": 0.4893, "step": 4784 }, { "epoch": 1.0673656033905867, "grad_norm": 0.16360625624656677, "learning_rate": 1.451503886384738e-05, "loss": 0.5065, "step": 4785 }, { "epoch": 1.067588668302476, "grad_norm": 0.1604512631893158, "learning_rate": 1.4512938846543583e-05, "loss": 0.4597, "step": 4786 }, { "epoch": 1.0678117332143653, "grad_norm": 0.16134783625602722, "learning_rate": 1.4510838579284849e-05, "loss": 0.467, "step": 4787 }, { "epoch": 1.0680347981262548, "grad_norm": 0.1614135503768921, "learning_rate": 1.4508738062187497e-05, "loss": 0.482, "step": 4788 }, { "epoch": 1.068257863038144, "grad_norm": 0.16068898141384125, "learning_rate": 1.4506637295367872e-05, "loss": 0.4557, "step": 4789 }, { "epoch": 1.0684809279500334, "grad_norm": 0.18150994181632996, "learning_rate": 1.4504536278942327e-05, "loss": 0.4796, "step": 4790 }, { "epoch": 1.0687039928619229, "grad_norm": 0.16428856551647186, "learning_rate": 1.4502435013027225e-05, "loss": 0.478, "step": 4791 }, { "epoch": 1.0689270577738121, "grad_norm": 0.16501575708389282, "learning_rate": 1.4500333497738955e-05, "loss": 0.4575, "step": 4792 }, { "epoch": 1.0691501226857016, "grad_norm": 0.16125361621379852, "learning_rate": 1.4498231733193904e-05, "loss": 0.4713, "step": 4793 }, { "epoch": 1.069373187597591, "grad_norm": 0.15834558010101318, "learning_rate": 1.4496129719508486e-05, "loss": 0.4721, "step": 4794 }, { "epoch": 1.0695962525094802, "grad_norm": 0.17870576679706573, "learning_rate": 1.449402745679912e-05, "loss": 0.4885, "step": 4795 }, { "epoch": 1.0698193174213697, "grad_norm": 0.15810585021972656, "learning_rate": 1.4491924945182248e-05, "loss": 0.4698, "step": 4796 }, { "epoch": 1.070042382333259, "grad_norm": 0.17196185886859894, "learning_rate": 1.4489822184774317e-05, "loss": 0.4946, "step": 4797 }, { "epoch": 1.0702654472451483, "grad_norm": 0.1619616448879242, "learning_rate": 1.448771917569179e-05, "loss": 0.4697, "step": 4798 }, { "epoch": 1.0704885121570378, "grad_norm": 0.1760883778333664, "learning_rate": 1.4485615918051146e-05, "loss": 0.4533, "step": 4799 }, { "epoch": 1.070711577068927, "grad_norm": 0.15732388198375702, "learning_rate": 1.448351241196888e-05, "loss": 0.4639, "step": 4800 }, { "epoch": 1.0709346419808163, "grad_norm": 0.1595136672258377, "learning_rate": 1.448140865756149e-05, "loss": 0.48, "step": 4801 }, { "epoch": 1.0711577068927058, "grad_norm": 0.19920584559440613, "learning_rate": 1.44793046549455e-05, "loss": 0.4647, "step": 4802 }, { "epoch": 1.0713807718045951, "grad_norm": 0.16085761785507202, "learning_rate": 1.4477200404237446e-05, "loss": 0.4788, "step": 4803 }, { "epoch": 1.0716038367164844, "grad_norm": 0.15879055857658386, "learning_rate": 1.4475095905553867e-05, "loss": 0.4805, "step": 4804 }, { "epoch": 1.071826901628374, "grad_norm": 0.16476532816886902, "learning_rate": 1.4472991159011329e-05, "loss": 0.4996, "step": 4805 }, { "epoch": 1.0720499665402632, "grad_norm": 0.1692475974559784, "learning_rate": 1.4470886164726403e-05, "loss": 0.464, "step": 4806 }, { "epoch": 1.0722730314521525, "grad_norm": 0.16641223430633545, "learning_rate": 1.4468780922815679e-05, "loss": 0.4698, "step": 4807 }, { "epoch": 1.072496096364042, "grad_norm": 0.15899412333965302, "learning_rate": 1.4466675433395758e-05, "loss": 0.4688, "step": 4808 }, { "epoch": 1.0727191612759313, "grad_norm": 0.1671571284532547, "learning_rate": 1.4464569696583256e-05, "loss": 0.4948, "step": 4809 }, { "epoch": 1.0729422261878208, "grad_norm": 0.16346615552902222, "learning_rate": 1.4462463712494799e-05, "loss": 0.4682, "step": 4810 }, { "epoch": 1.07316529109971, "grad_norm": 0.16411162912845612, "learning_rate": 1.4460357481247035e-05, "loss": 0.4704, "step": 4811 }, { "epoch": 1.0733883560115993, "grad_norm": 0.16444380581378937, "learning_rate": 1.4458251002956612e-05, "loss": 0.478, "step": 4812 }, { "epoch": 1.0736114209234888, "grad_norm": 0.15539826452732086, "learning_rate": 1.4456144277740207e-05, "loss": 0.4827, "step": 4813 }, { "epoch": 1.073834485835378, "grad_norm": 0.16206075251102448, "learning_rate": 1.4454037305714501e-05, "loss": 0.4651, "step": 4814 }, { "epoch": 1.0740575507472674, "grad_norm": 0.1546122431755066, "learning_rate": 1.4451930086996193e-05, "loss": 0.4668, "step": 4815 }, { "epoch": 1.074280615659157, "grad_norm": 0.15941190719604492, "learning_rate": 1.4449822621701992e-05, "loss": 0.4674, "step": 4816 }, { "epoch": 1.0745036805710462, "grad_norm": 0.16339150071144104, "learning_rate": 1.4447714909948624e-05, "loss": 0.4957, "step": 4817 }, { "epoch": 1.0747267454829355, "grad_norm": 0.15850695967674255, "learning_rate": 1.4445606951852828e-05, "loss": 0.487, "step": 4818 }, { "epoch": 1.074949810394825, "grad_norm": 0.16682858765125275, "learning_rate": 1.4443498747531358e-05, "loss": 0.4677, "step": 4819 }, { "epoch": 1.0751728753067142, "grad_norm": 0.1573875993490219, "learning_rate": 1.444139029710097e-05, "loss": 0.4452, "step": 4820 }, { "epoch": 1.0753959402186035, "grad_norm": 0.1620924174785614, "learning_rate": 1.4439281600678455e-05, "loss": 0.4767, "step": 4821 }, { "epoch": 1.075619005130493, "grad_norm": 0.15519385039806366, "learning_rate": 1.4437172658380598e-05, "loss": 0.4571, "step": 4822 }, { "epoch": 1.0758420700423823, "grad_norm": 0.17758627235889435, "learning_rate": 1.443506347032421e-05, "loss": 0.4767, "step": 4823 }, { "epoch": 1.0760651349542716, "grad_norm": 0.15553906559944153, "learning_rate": 1.4432954036626108e-05, "loss": 0.4659, "step": 4824 }, { "epoch": 1.076288199866161, "grad_norm": 0.15930281579494476, "learning_rate": 1.4430844357403126e-05, "loss": 0.4579, "step": 4825 }, { "epoch": 1.0765112647780504, "grad_norm": 0.15948422253131866, "learning_rate": 1.4428734432772115e-05, "loss": 0.4682, "step": 4826 }, { "epoch": 1.0767343296899399, "grad_norm": 0.15884706377983093, "learning_rate": 1.4426624262849932e-05, "loss": 0.469, "step": 4827 }, { "epoch": 1.0769573946018292, "grad_norm": 0.15958735346794128, "learning_rate": 1.4424513847753455e-05, "loss": 0.4576, "step": 4828 }, { "epoch": 1.0771804595137184, "grad_norm": 0.16710686683654785, "learning_rate": 1.4422403187599565e-05, "loss": 0.471, "step": 4829 }, { "epoch": 1.077403524425608, "grad_norm": 0.2897154688835144, "learning_rate": 1.4420292282505173e-05, "loss": 0.4901, "step": 4830 }, { "epoch": 1.0776265893374972, "grad_norm": 0.1648949235677719, "learning_rate": 1.4418181132587185e-05, "loss": 0.4867, "step": 4831 }, { "epoch": 1.0778496542493865, "grad_norm": 0.1587132066488266, "learning_rate": 1.4416069737962538e-05, "loss": 0.4554, "step": 4832 }, { "epoch": 1.078072719161276, "grad_norm": 0.3479141891002655, "learning_rate": 1.4413958098748171e-05, "loss": 0.4814, "step": 4833 }, { "epoch": 1.0782957840731653, "grad_norm": 0.15964049100875854, "learning_rate": 1.441184621506104e-05, "loss": 0.4694, "step": 4834 }, { "epoch": 1.0785188489850546, "grad_norm": 0.16396582126617432, "learning_rate": 1.440973408701811e-05, "loss": 0.4892, "step": 4835 }, { "epoch": 1.078741913896944, "grad_norm": 0.1593598872423172, "learning_rate": 1.4407621714736372e-05, "loss": 0.4485, "step": 4836 }, { "epoch": 1.0789649788088334, "grad_norm": 0.17762166261672974, "learning_rate": 1.4405509098332818e-05, "loss": 0.4358, "step": 4837 }, { "epoch": 1.0791880437207226, "grad_norm": 0.15934491157531738, "learning_rate": 1.4403396237924456e-05, "loss": 0.4734, "step": 4838 }, { "epoch": 1.0794111086326121, "grad_norm": 0.18883134424686432, "learning_rate": 1.4401283133628315e-05, "loss": 0.4488, "step": 4839 }, { "epoch": 1.0796341735445014, "grad_norm": 0.1616184413433075, "learning_rate": 1.4399169785561426e-05, "loss": 0.465, "step": 4840 }, { "epoch": 1.0798572384563907, "grad_norm": 0.16874286532402039, "learning_rate": 1.4397056193840842e-05, "loss": 0.5069, "step": 4841 }, { "epoch": 1.0800803033682802, "grad_norm": 0.18162856996059418, "learning_rate": 1.4394942358583627e-05, "loss": 0.4929, "step": 4842 }, { "epoch": 1.0803033682801695, "grad_norm": 0.15963618457317352, "learning_rate": 1.4392828279906866e-05, "loss": 0.46, "step": 4843 }, { "epoch": 1.080526433192059, "grad_norm": 0.17380237579345703, "learning_rate": 1.4390713957927636e-05, "loss": 0.4929, "step": 4844 }, { "epoch": 1.0807494981039483, "grad_norm": 0.174495667219162, "learning_rate": 1.4388599392763052e-05, "loss": 0.4626, "step": 4845 }, { "epoch": 1.0809725630158376, "grad_norm": 0.17126008868217468, "learning_rate": 1.4386484584530225e-05, "loss": 0.47, "step": 4846 }, { "epoch": 1.081195627927727, "grad_norm": 0.15579789876937866, "learning_rate": 1.4384369533346292e-05, "loss": 0.4565, "step": 4847 }, { "epoch": 1.0814186928396163, "grad_norm": 0.16992245614528656, "learning_rate": 1.4382254239328392e-05, "loss": 0.4646, "step": 4848 }, { "epoch": 1.0816417577515056, "grad_norm": 0.1682998389005661, "learning_rate": 1.4380138702593691e-05, "loss": 0.4807, "step": 4849 }, { "epoch": 1.0818648226633951, "grad_norm": 0.16254080832004547, "learning_rate": 1.4378022923259354e-05, "loss": 0.4604, "step": 4850 }, { "epoch": 1.0820878875752844, "grad_norm": 0.16904620826244354, "learning_rate": 1.4375906901442568e-05, "loss": 0.5068, "step": 4851 }, { "epoch": 1.0823109524871737, "grad_norm": 0.15982915461063385, "learning_rate": 1.4373790637260534e-05, "loss": 0.4757, "step": 4852 }, { "epoch": 1.0825340173990632, "grad_norm": 0.1598198115825653, "learning_rate": 1.4371674130830462e-05, "loss": 0.4596, "step": 4853 }, { "epoch": 1.0827570823109525, "grad_norm": 0.2856222689151764, "learning_rate": 1.4369557382269577e-05, "loss": 0.4618, "step": 4854 }, { "epoch": 1.0829801472228417, "grad_norm": 0.1609204113483429, "learning_rate": 1.4367440391695118e-05, "loss": 0.4814, "step": 4855 }, { "epoch": 1.0832032121347313, "grad_norm": 0.1572166234254837, "learning_rate": 1.4365323159224341e-05, "loss": 0.4768, "step": 4856 }, { "epoch": 1.0834262770466205, "grad_norm": 0.15718881785869598, "learning_rate": 1.4363205684974504e-05, "loss": 0.4594, "step": 4857 }, { "epoch": 1.0836493419585098, "grad_norm": 0.1571321189403534, "learning_rate": 1.4361087969062895e-05, "loss": 0.4705, "step": 4858 }, { "epoch": 1.0838724068703993, "grad_norm": 0.16006889939308167, "learning_rate": 1.4358970011606798e-05, "loss": 0.4549, "step": 4859 }, { "epoch": 1.0840954717822886, "grad_norm": 0.1665477454662323, "learning_rate": 1.4356851812723524e-05, "loss": 0.4657, "step": 4860 }, { "epoch": 1.084318536694178, "grad_norm": 0.16529056429862976, "learning_rate": 1.4354733372530391e-05, "loss": 0.476, "step": 4861 }, { "epoch": 1.0845416016060674, "grad_norm": 0.1675388067960739, "learning_rate": 1.4352614691144733e-05, "loss": 0.4721, "step": 4862 }, { "epoch": 1.0847646665179567, "grad_norm": 0.16424338519573212, "learning_rate": 1.4350495768683893e-05, "loss": 0.4856, "step": 4863 }, { "epoch": 1.0849877314298462, "grad_norm": 0.16360746324062347, "learning_rate": 1.4348376605265233e-05, "loss": 0.4916, "step": 4864 }, { "epoch": 1.0852107963417355, "grad_norm": 0.16066889464855194, "learning_rate": 1.4346257201006122e-05, "loss": 0.4606, "step": 4865 }, { "epoch": 1.0854338612536247, "grad_norm": 0.15210098028182983, "learning_rate": 1.434413755602395e-05, "loss": 0.4684, "step": 4866 }, { "epoch": 1.0856569261655142, "grad_norm": 0.16423378884792328, "learning_rate": 1.4342017670436113e-05, "loss": 0.4654, "step": 4867 }, { "epoch": 1.0858799910774035, "grad_norm": 0.191256582736969, "learning_rate": 1.4339897544360026e-05, "loss": 0.4729, "step": 4868 }, { "epoch": 1.0861030559892928, "grad_norm": 0.15907546877861023, "learning_rate": 1.4337777177913113e-05, "loss": 0.4545, "step": 4869 }, { "epoch": 1.0863261209011823, "grad_norm": 0.15440797805786133, "learning_rate": 1.4335656571212814e-05, "loss": 0.4816, "step": 4870 }, { "epoch": 1.0865491858130716, "grad_norm": 0.16571897268295288, "learning_rate": 1.4333535724376585e-05, "loss": 0.4498, "step": 4871 }, { "epoch": 1.0867722507249609, "grad_norm": 0.16138556599617004, "learning_rate": 1.4331414637521885e-05, "loss": 0.4541, "step": 4872 }, { "epoch": 1.0869953156368504, "grad_norm": 0.17464113235473633, "learning_rate": 1.4329293310766196e-05, "loss": 0.4997, "step": 4873 }, { "epoch": 1.0872183805487396, "grad_norm": 0.15958289802074432, "learning_rate": 1.4327171744227015e-05, "loss": 0.4351, "step": 4874 }, { "epoch": 1.087441445460629, "grad_norm": 0.17836567759513855, "learning_rate": 1.432504993802184e-05, "loss": 0.4626, "step": 4875 }, { "epoch": 1.0876645103725184, "grad_norm": 0.16179955005645752, "learning_rate": 1.4322927892268195e-05, "loss": 0.4931, "step": 4876 }, { "epoch": 1.0878875752844077, "grad_norm": 0.16194845736026764, "learning_rate": 1.4320805607083611e-05, "loss": 0.4619, "step": 4877 }, { "epoch": 1.0881106401962972, "grad_norm": 0.16176071763038635, "learning_rate": 1.4318683082585634e-05, "loss": 0.4798, "step": 4878 }, { "epoch": 1.0883337051081865, "grad_norm": 0.15933561325073242, "learning_rate": 1.4316560318891823e-05, "loss": 0.4635, "step": 4879 }, { "epoch": 1.0885567700200758, "grad_norm": 0.16447357833385468, "learning_rate": 1.431443731611975e-05, "loss": 0.4747, "step": 4880 }, { "epoch": 1.0887798349319653, "grad_norm": 0.1632988601922989, "learning_rate": 1.4312314074386998e-05, "loss": 0.489, "step": 4881 }, { "epoch": 1.0890028998438546, "grad_norm": 0.16720198094844818, "learning_rate": 1.4310190593811167e-05, "loss": 0.4814, "step": 4882 }, { "epoch": 1.0892259647557438, "grad_norm": 0.16806739568710327, "learning_rate": 1.4308066874509869e-05, "loss": 0.4566, "step": 4883 }, { "epoch": 1.0894490296676334, "grad_norm": 0.1616521179676056, "learning_rate": 1.430594291660073e-05, "loss": 0.4875, "step": 4884 }, { "epoch": 1.0896720945795226, "grad_norm": 0.15476809442043304, "learning_rate": 1.4303818720201386e-05, "loss": 0.4632, "step": 4885 }, { "epoch": 1.089895159491412, "grad_norm": 0.15796570479869843, "learning_rate": 1.4301694285429489e-05, "loss": 0.4558, "step": 4886 }, { "epoch": 1.0901182244033014, "grad_norm": 0.1628834307193756, "learning_rate": 1.4299569612402701e-05, "loss": 0.4578, "step": 4887 }, { "epoch": 1.0903412893151907, "grad_norm": 0.16493961215019226, "learning_rate": 1.4297444701238706e-05, "loss": 0.5188, "step": 4888 }, { "epoch": 1.09056435422708, "grad_norm": 0.17024171352386475, "learning_rate": 1.4295319552055191e-05, "loss": 0.4797, "step": 4889 }, { "epoch": 1.0907874191389695, "grad_norm": 0.16179481148719788, "learning_rate": 1.4293194164969859e-05, "loss": 0.4894, "step": 4890 }, { "epoch": 1.0910104840508588, "grad_norm": 0.15970705449581146, "learning_rate": 1.429106854010043e-05, "loss": 0.4698, "step": 4891 }, { "epoch": 1.091233548962748, "grad_norm": 0.15802475810050964, "learning_rate": 1.4288942677564634e-05, "loss": 0.4585, "step": 4892 }, { "epoch": 1.0914566138746375, "grad_norm": 0.16076651215553284, "learning_rate": 1.4286816577480211e-05, "loss": 0.4643, "step": 4893 }, { "epoch": 1.0916796787865268, "grad_norm": 0.1617365926504135, "learning_rate": 1.4284690239964925e-05, "loss": 0.4608, "step": 4894 }, { "epoch": 1.0919027436984163, "grad_norm": 0.1737692803144455, "learning_rate": 1.428256366513654e-05, "loss": 0.5016, "step": 4895 }, { "epoch": 1.0921258086103056, "grad_norm": 0.16984876990318298, "learning_rate": 1.428043685311284e-05, "loss": 0.476, "step": 4896 }, { "epoch": 1.092348873522195, "grad_norm": 0.16375143826007843, "learning_rate": 1.427830980401162e-05, "loss": 0.485, "step": 4897 }, { "epoch": 1.0925719384340844, "grad_norm": 0.16099539399147034, "learning_rate": 1.4276182517950696e-05, "loss": 0.4743, "step": 4898 }, { "epoch": 1.0927950033459737, "grad_norm": 0.16580551862716675, "learning_rate": 1.4274054995047884e-05, "loss": 0.473, "step": 4899 }, { "epoch": 1.093018068257863, "grad_norm": 0.15540550649166107, "learning_rate": 1.427192723542102e-05, "loss": 0.4353, "step": 4900 }, { "epoch": 1.0932411331697525, "grad_norm": 0.1589297354221344, "learning_rate": 1.4269799239187956e-05, "loss": 0.4637, "step": 4901 }, { "epoch": 1.0934641980816417, "grad_norm": 0.16535155475139618, "learning_rate": 1.4267671006466552e-05, "loss": 0.4492, "step": 4902 }, { "epoch": 1.093687262993531, "grad_norm": 0.15984977781772614, "learning_rate": 1.4265542537374684e-05, "loss": 0.4703, "step": 4903 }, { "epoch": 1.0939103279054205, "grad_norm": 0.15200082957744598, "learning_rate": 1.4263413832030237e-05, "loss": 0.4483, "step": 4904 }, { "epoch": 1.0941333928173098, "grad_norm": 0.21631982922554016, "learning_rate": 1.4261284890551115e-05, "loss": 0.4881, "step": 4905 }, { "epoch": 1.094356457729199, "grad_norm": 0.16515322029590607, "learning_rate": 1.4259155713055231e-05, "loss": 0.4788, "step": 4906 }, { "epoch": 1.0945795226410886, "grad_norm": 0.16507336497306824, "learning_rate": 1.4257026299660511e-05, "loss": 0.4749, "step": 4907 }, { "epoch": 1.0948025875529779, "grad_norm": 0.17329774796962738, "learning_rate": 1.4254896650484897e-05, "loss": 0.4708, "step": 4908 }, { "epoch": 1.0950256524648672, "grad_norm": 0.15935118496418, "learning_rate": 1.4252766765646344e-05, "loss": 0.4405, "step": 4909 }, { "epoch": 1.0952487173767567, "grad_norm": 0.16520404815673828, "learning_rate": 1.4250636645262813e-05, "loss": 0.4546, "step": 4910 }, { "epoch": 1.095471782288646, "grad_norm": 0.16655127704143524, "learning_rate": 1.424850628945229e-05, "loss": 0.4838, "step": 4911 }, { "epoch": 1.0956948472005354, "grad_norm": 0.17478324472904205, "learning_rate": 1.4246375698332764e-05, "loss": 0.4932, "step": 4912 }, { "epoch": 1.0959179121124247, "grad_norm": 0.15752072632312775, "learning_rate": 1.4244244872022244e-05, "loss": 0.4386, "step": 4913 }, { "epoch": 1.096140977024314, "grad_norm": 0.16019755601882935, "learning_rate": 1.424211381063874e-05, "loss": 0.4474, "step": 4914 }, { "epoch": 1.0963640419362035, "grad_norm": 0.1732664704322815, "learning_rate": 1.4239982514300294e-05, "loss": 0.4423, "step": 4915 }, { "epoch": 1.0965871068480928, "grad_norm": 0.16661664843559265, "learning_rate": 1.4237850983124943e-05, "loss": 0.5198, "step": 4916 }, { "epoch": 1.096810171759982, "grad_norm": 0.15834182500839233, "learning_rate": 1.4235719217230751e-05, "loss": 0.4784, "step": 4917 }, { "epoch": 1.0970332366718716, "grad_norm": 0.16510586440563202, "learning_rate": 1.423358721673578e-05, "loss": 0.4843, "step": 4918 }, { "epoch": 1.0972563015837609, "grad_norm": 0.16284050047397614, "learning_rate": 1.4231454981758122e-05, "loss": 0.4559, "step": 4919 }, { "epoch": 1.0974793664956501, "grad_norm": 0.1576804369688034, "learning_rate": 1.422932251241587e-05, "loss": 0.4855, "step": 4920 }, { "epoch": 1.0977024314075396, "grad_norm": 0.17963336408138275, "learning_rate": 1.4227189808827131e-05, "loss": 0.456, "step": 4921 }, { "epoch": 1.097925496319429, "grad_norm": 0.16884389519691467, "learning_rate": 1.4225056871110032e-05, "loss": 0.4513, "step": 4922 }, { "epoch": 1.0981485612313182, "grad_norm": 0.16066482663154602, "learning_rate": 1.422292369938271e-05, "loss": 0.4753, "step": 4923 }, { "epoch": 1.0983716261432077, "grad_norm": 0.16328173875808716, "learning_rate": 1.4220790293763307e-05, "loss": 0.4872, "step": 4924 }, { "epoch": 1.098594691055097, "grad_norm": 0.16611479222774506, "learning_rate": 1.4218656654369987e-05, "loss": 0.4599, "step": 4925 }, { "epoch": 1.0988177559669863, "grad_norm": 0.15916502475738525, "learning_rate": 1.4216522781320928e-05, "loss": 0.4709, "step": 4926 }, { "epoch": 1.0990408208788758, "grad_norm": 0.18872345983982086, "learning_rate": 1.4214388674734309e-05, "loss": 0.469, "step": 4927 }, { "epoch": 1.099263885790765, "grad_norm": 0.1624644547700882, "learning_rate": 1.421225433472834e-05, "loss": 0.4793, "step": 4928 }, { "epoch": 1.0994869507026546, "grad_norm": 0.16488322615623474, "learning_rate": 1.4210119761421228e-05, "loss": 0.4534, "step": 4929 }, { "epoch": 1.0997100156145438, "grad_norm": 0.15583010017871857, "learning_rate": 1.4207984954931204e-05, "loss": 0.4851, "step": 4930 }, { "epoch": 1.0999330805264331, "grad_norm": 0.1624946892261505, "learning_rate": 1.4205849915376501e-05, "loss": 0.4915, "step": 4931 }, { "epoch": 1.1001561454383226, "grad_norm": 0.15822146832942963, "learning_rate": 1.4203714642875377e-05, "loss": 0.4684, "step": 4932 }, { "epoch": 1.100379210350212, "grad_norm": 0.16068744659423828, "learning_rate": 1.420157913754609e-05, "loss": 0.4555, "step": 4933 }, { "epoch": 1.1006022752621012, "grad_norm": 0.18929767608642578, "learning_rate": 1.4199443399506922e-05, "loss": 0.4551, "step": 4934 }, { "epoch": 1.1008253401739907, "grad_norm": 0.1583186835050583, "learning_rate": 1.4197307428876164e-05, "loss": 0.4449, "step": 4935 }, { "epoch": 1.10104840508588, "grad_norm": 0.16161227226257324, "learning_rate": 1.4195171225772117e-05, "loss": 0.4557, "step": 4936 }, { "epoch": 1.1012714699977693, "grad_norm": 0.16375921666622162, "learning_rate": 1.4193034790313101e-05, "loss": 0.4627, "step": 4937 }, { "epoch": 1.1014945349096588, "grad_norm": 0.17420132458209991, "learning_rate": 1.4190898122617443e-05, "loss": 0.4736, "step": 4938 }, { "epoch": 1.101717599821548, "grad_norm": 0.16097956895828247, "learning_rate": 1.4188761222803482e-05, "loss": 0.4511, "step": 4939 }, { "epoch": 1.1019406647334375, "grad_norm": 0.16940614581108093, "learning_rate": 1.4186624090989578e-05, "loss": 0.4763, "step": 4940 }, { "epoch": 1.1021637296453268, "grad_norm": 0.1643071174621582, "learning_rate": 1.4184486727294098e-05, "loss": 0.4904, "step": 4941 }, { "epoch": 1.102386794557216, "grad_norm": 0.1605340987443924, "learning_rate": 1.418234913183542e-05, "loss": 0.4621, "step": 4942 }, { "epoch": 1.1026098594691056, "grad_norm": 0.15705011785030365, "learning_rate": 1.4180211304731941e-05, "loss": 0.4779, "step": 4943 }, { "epoch": 1.102832924380995, "grad_norm": 0.164706289768219, "learning_rate": 1.4178073246102062e-05, "loss": 0.4685, "step": 4944 }, { "epoch": 1.1030559892928842, "grad_norm": 0.1530647873878479, "learning_rate": 1.417593495606421e-05, "loss": 0.4643, "step": 4945 }, { "epoch": 1.1032790542047737, "grad_norm": 0.1623847335577011, "learning_rate": 1.4173796434736808e-05, "loss": 0.4606, "step": 4946 }, { "epoch": 1.103502119116663, "grad_norm": 0.17122423648834229, "learning_rate": 1.4171657682238309e-05, "loss": 0.4649, "step": 4947 }, { "epoch": 1.1037251840285522, "grad_norm": 0.15987126529216766, "learning_rate": 1.4169518698687164e-05, "loss": 0.469, "step": 4948 }, { "epoch": 1.1039482489404417, "grad_norm": 0.1600853055715561, "learning_rate": 1.416737948420185e-05, "loss": 0.4782, "step": 4949 }, { "epoch": 1.104171313852331, "grad_norm": 0.15356458723545074, "learning_rate": 1.4165240038900843e-05, "loss": 0.4474, "step": 4950 }, { "epoch": 1.1043943787642203, "grad_norm": 0.15699787437915802, "learning_rate": 1.4163100362902642e-05, "loss": 0.4525, "step": 4951 }, { "epoch": 1.1046174436761098, "grad_norm": 0.161276713013649, "learning_rate": 1.4160960456325757e-05, "loss": 0.4644, "step": 4952 }, { "epoch": 1.104840508587999, "grad_norm": 0.17264524102210999, "learning_rate": 1.4158820319288709e-05, "loss": 0.4881, "step": 4953 }, { "epoch": 1.1050635734998884, "grad_norm": 0.15942956507205963, "learning_rate": 1.4156679951910031e-05, "loss": 0.4431, "step": 4954 }, { "epoch": 1.1052866384117779, "grad_norm": 0.16725444793701172, "learning_rate": 1.415453935430827e-05, "loss": 0.5334, "step": 4955 }, { "epoch": 1.1055097033236672, "grad_norm": 0.15742219984531403, "learning_rate": 1.4152398526601987e-05, "loss": 0.4648, "step": 4956 }, { "epoch": 1.1057327682355567, "grad_norm": 0.16164308786392212, "learning_rate": 1.4150257468909753e-05, "loss": 0.4746, "step": 4957 }, { "epoch": 1.105955833147446, "grad_norm": 0.16626529395580292, "learning_rate": 1.4148116181350155e-05, "loss": 0.4763, "step": 4958 }, { "epoch": 1.1061788980593352, "grad_norm": 0.16360588371753693, "learning_rate": 1.4145974664041793e-05, "loss": 0.4983, "step": 4959 }, { "epoch": 1.1064019629712247, "grad_norm": 0.16253133118152618, "learning_rate": 1.4143832917103271e-05, "loss": 0.4742, "step": 4960 }, { "epoch": 1.106625027883114, "grad_norm": 0.16334842145442963, "learning_rate": 1.4141690940653217e-05, "loss": 0.4921, "step": 4961 }, { "epoch": 1.1068480927950033, "grad_norm": 0.1619054228067398, "learning_rate": 1.4139548734810267e-05, "loss": 0.485, "step": 4962 }, { "epoch": 1.1070711577068928, "grad_norm": 0.16352368891239166, "learning_rate": 1.4137406299693068e-05, "loss": 0.4702, "step": 4963 }, { "epoch": 1.107294222618782, "grad_norm": 0.16347824037075043, "learning_rate": 1.4135263635420287e-05, "loss": 0.48, "step": 4964 }, { "epoch": 1.1075172875306714, "grad_norm": 0.15994015336036682, "learning_rate": 1.4133120742110591e-05, "loss": 0.477, "step": 4965 }, { "epoch": 1.1077403524425609, "grad_norm": 0.172930508852005, "learning_rate": 1.4130977619882673e-05, "loss": 0.4882, "step": 4966 }, { "epoch": 1.1079634173544501, "grad_norm": 0.16305923461914062, "learning_rate": 1.4128834268855224e-05, "loss": 0.4737, "step": 4967 }, { "epoch": 1.1081864822663394, "grad_norm": 0.1601942926645279, "learning_rate": 1.4126690689146967e-05, "loss": 0.4876, "step": 4968 }, { "epoch": 1.108409547178229, "grad_norm": 0.16627256572246552, "learning_rate": 1.4124546880876617e-05, "loss": 0.4928, "step": 4969 }, { "epoch": 1.1086326120901182, "grad_norm": 0.16845837235450745, "learning_rate": 1.4122402844162921e-05, "loss": 0.4756, "step": 4970 }, { "epoch": 1.1088556770020075, "grad_norm": 0.1565047651529312, "learning_rate": 1.412025857912462e-05, "loss": 0.4661, "step": 4971 }, { "epoch": 1.109078741913897, "grad_norm": 0.15977446734905243, "learning_rate": 1.4118114085880484e-05, "loss": 0.4466, "step": 4972 }, { "epoch": 1.1093018068257863, "grad_norm": 0.16255097091197968, "learning_rate": 1.4115969364549288e-05, "loss": 0.4677, "step": 4973 }, { "epoch": 1.1095248717376758, "grad_norm": 0.15899838507175446, "learning_rate": 1.4113824415249812e-05, "loss": 0.4786, "step": 4974 }, { "epoch": 1.109747936649565, "grad_norm": 0.15630389750003815, "learning_rate": 1.4111679238100868e-05, "loss": 0.4657, "step": 4975 }, { "epoch": 1.1099710015614543, "grad_norm": 0.16901795566082, "learning_rate": 1.4109533833221263e-05, "loss": 0.4582, "step": 4976 }, { "epoch": 1.1101940664733438, "grad_norm": 0.15667371451854706, "learning_rate": 1.4107388200729824e-05, "loss": 0.4686, "step": 4977 }, { "epoch": 1.1104171313852331, "grad_norm": 0.1538134068250656, "learning_rate": 1.4105242340745388e-05, "loss": 0.4591, "step": 4978 }, { "epoch": 1.1106401962971224, "grad_norm": 0.15832480788230896, "learning_rate": 1.4103096253386812e-05, "loss": 0.4598, "step": 4979 }, { "epoch": 1.110863261209012, "grad_norm": 0.16176483035087585, "learning_rate": 1.4100949938772953e-05, "loss": 0.4959, "step": 4980 }, { "epoch": 1.1110863261209012, "grad_norm": 0.15992896258831024, "learning_rate": 1.4098803397022694e-05, "loss": 0.4693, "step": 4981 }, { "epoch": 1.1113093910327905, "grad_norm": 0.16486084461212158, "learning_rate": 1.4096656628254916e-05, "loss": 0.4687, "step": 4982 }, { "epoch": 1.11153245594468, "grad_norm": 0.16968096792697906, "learning_rate": 1.4094509632588528e-05, "loss": 0.4648, "step": 4983 }, { "epoch": 1.1117555208565693, "grad_norm": 0.1609363853931427, "learning_rate": 1.409236241014244e-05, "loss": 0.47, "step": 4984 }, { "epoch": 1.1119785857684585, "grad_norm": 0.16171255707740784, "learning_rate": 1.409021496103558e-05, "loss": 0.4538, "step": 4985 }, { "epoch": 1.112201650680348, "grad_norm": 0.1664259135723114, "learning_rate": 1.4088067285386885e-05, "loss": 0.4717, "step": 4986 }, { "epoch": 1.1124247155922373, "grad_norm": 0.16782160103321075, "learning_rate": 1.4085919383315311e-05, "loss": 0.472, "step": 4987 }, { "epoch": 1.1126477805041266, "grad_norm": 0.17342595756053925, "learning_rate": 1.408377125493982e-05, "loss": 0.5161, "step": 4988 }, { "epoch": 1.112870845416016, "grad_norm": 0.17580246925354004, "learning_rate": 1.408162290037939e-05, "loss": 0.4874, "step": 4989 }, { "epoch": 1.1130939103279054, "grad_norm": 0.16201059520244598, "learning_rate": 1.4079474319753007e-05, "loss": 0.4597, "step": 4990 }, { "epoch": 1.113316975239795, "grad_norm": 0.1731184720993042, "learning_rate": 1.4077325513179676e-05, "loss": 0.5251, "step": 4991 }, { "epoch": 1.1135400401516842, "grad_norm": 0.14824289083480835, "learning_rate": 1.407517648077841e-05, "loss": 0.4159, "step": 4992 }, { "epoch": 1.1137631050635735, "grad_norm": 0.1582590788602829, "learning_rate": 1.4073027222668236e-05, "loss": 0.4813, "step": 4993 }, { "epoch": 1.113986169975463, "grad_norm": 0.15586505830287933, "learning_rate": 1.4070877738968196e-05, "loss": 0.4634, "step": 4994 }, { "epoch": 1.1142092348873522, "grad_norm": 0.15916606783866882, "learning_rate": 1.4068728029797338e-05, "loss": 0.4608, "step": 4995 }, { "epoch": 1.1144322997992415, "grad_norm": 0.15859851241111755, "learning_rate": 1.4066578095274732e-05, "loss": 0.4701, "step": 4996 }, { "epoch": 1.114655364711131, "grad_norm": 0.1668933480978012, "learning_rate": 1.406442793551945e-05, "loss": 0.4823, "step": 4997 }, { "epoch": 1.1148784296230203, "grad_norm": 0.15839633345603943, "learning_rate": 1.406227755065058e-05, "loss": 0.4411, "step": 4998 }, { "epoch": 1.1151014945349096, "grad_norm": 0.16563774645328522, "learning_rate": 1.4060126940787228e-05, "loss": 0.5158, "step": 4999 }, { "epoch": 1.115324559446799, "grad_norm": 0.16948749125003815, "learning_rate": 1.4057976106048509e-05, "loss": 0.4735, "step": 5000 }, { "epoch": 1.1155476243586884, "grad_norm": 0.15439462661743164, "learning_rate": 1.4055825046553544e-05, "loss": 0.4646, "step": 5001 }, { "epoch": 1.1157706892705777, "grad_norm": 0.22990640997886658, "learning_rate": 1.4053673762421478e-05, "loss": 0.4645, "step": 5002 }, { "epoch": 1.1159937541824672, "grad_norm": 0.16069011390209198, "learning_rate": 1.4051522253771458e-05, "loss": 0.4598, "step": 5003 }, { "epoch": 1.1162168190943564, "grad_norm": 0.15775886178016663, "learning_rate": 1.4049370520722657e-05, "loss": 0.468, "step": 5004 }, { "epoch": 1.1164398840062457, "grad_norm": 0.16639457643032074, "learning_rate": 1.4047218563394238e-05, "loss": 0.5046, "step": 5005 }, { "epoch": 1.1166629489181352, "grad_norm": 0.16088278591632843, "learning_rate": 1.40450663819054e-05, "loss": 0.4801, "step": 5006 }, { "epoch": 1.1168860138300245, "grad_norm": 0.16275745630264282, "learning_rate": 1.404291397637534e-05, "loss": 0.4568, "step": 5007 }, { "epoch": 1.117109078741914, "grad_norm": 0.16248852014541626, "learning_rate": 1.4040761346923275e-05, "loss": 0.4617, "step": 5008 }, { "epoch": 1.1173321436538033, "grad_norm": 0.15882933139801025, "learning_rate": 1.4038608493668428e-05, "loss": 0.4553, "step": 5009 }, { "epoch": 1.1175552085656926, "grad_norm": 0.15445846319198608, "learning_rate": 1.4036455416730038e-05, "loss": 0.4623, "step": 5010 }, { "epoch": 1.117778273477582, "grad_norm": 0.16165289282798767, "learning_rate": 1.4034302116227358e-05, "loss": 0.4522, "step": 5011 }, { "epoch": 1.1180013383894714, "grad_norm": 0.16482336819171906, "learning_rate": 1.4032148592279649e-05, "loss": 0.4897, "step": 5012 }, { "epoch": 1.1182244033013606, "grad_norm": 0.16417334973812103, "learning_rate": 1.4029994845006187e-05, "loss": 0.459, "step": 5013 }, { "epoch": 1.1184474682132501, "grad_norm": 0.16407686471939087, "learning_rate": 1.4027840874526262e-05, "loss": 0.4656, "step": 5014 }, { "epoch": 1.1186705331251394, "grad_norm": 0.1576424241065979, "learning_rate": 1.4025686680959174e-05, "loss": 0.4284, "step": 5015 }, { "epoch": 1.1188935980370287, "grad_norm": 0.15783734619617462, "learning_rate": 1.4023532264424233e-05, "loss": 0.4609, "step": 5016 }, { "epoch": 1.1191166629489182, "grad_norm": 0.1539250910282135, "learning_rate": 1.4021377625040768e-05, "loss": 0.4507, "step": 5017 }, { "epoch": 1.1193397278608075, "grad_norm": 0.16160206496715546, "learning_rate": 1.4019222762928113e-05, "loss": 0.456, "step": 5018 }, { "epoch": 1.1195627927726968, "grad_norm": 0.15390300750732422, "learning_rate": 1.4017067678205623e-05, "loss": 0.4637, "step": 5019 }, { "epoch": 1.1197858576845863, "grad_norm": 0.15983295440673828, "learning_rate": 1.4014912370992653e-05, "loss": 0.4809, "step": 5020 }, { "epoch": 1.1200089225964756, "grad_norm": 0.16199488937854767, "learning_rate": 1.4012756841408583e-05, "loss": 0.4433, "step": 5021 }, { "epoch": 1.1202319875083648, "grad_norm": 0.1602371335029602, "learning_rate": 1.4010601089572794e-05, "loss": 0.4472, "step": 5022 }, { "epoch": 1.1204550524202543, "grad_norm": 0.16461622714996338, "learning_rate": 1.4008445115604694e-05, "loss": 0.4709, "step": 5023 }, { "epoch": 1.1206781173321436, "grad_norm": 0.16210167109966278, "learning_rate": 1.4006288919623687e-05, "loss": 0.4569, "step": 5024 }, { "epoch": 1.1209011822440331, "grad_norm": 0.15823470056056976, "learning_rate": 1.4004132501749198e-05, "loss": 0.4794, "step": 5025 }, { "epoch": 1.1211242471559224, "grad_norm": 0.155581995844841, "learning_rate": 1.4001975862100668e-05, "loss": 0.4442, "step": 5026 }, { "epoch": 1.1213473120678117, "grad_norm": 0.1608058661222458, "learning_rate": 1.3999819000797539e-05, "loss": 0.4535, "step": 5027 }, { "epoch": 1.1215703769797012, "grad_norm": 0.16367916762828827, "learning_rate": 1.3997661917959273e-05, "loss": 0.4581, "step": 5028 }, { "epoch": 1.1217934418915905, "grad_norm": 0.1618059128522873, "learning_rate": 1.3995504613705344e-05, "loss": 0.464, "step": 5029 }, { "epoch": 1.1220165068034798, "grad_norm": 0.15621179342269897, "learning_rate": 1.3993347088155237e-05, "loss": 0.4573, "step": 5030 }, { "epoch": 1.1222395717153693, "grad_norm": 0.1720859855413437, "learning_rate": 1.399118934142845e-05, "loss": 0.4854, "step": 5031 }, { "epoch": 1.1224626366272585, "grad_norm": 0.16994187235832214, "learning_rate": 1.3989031373644491e-05, "loss": 0.5074, "step": 5032 }, { "epoch": 1.1226857015391478, "grad_norm": 0.20645354688167572, "learning_rate": 1.3986873184922882e-05, "loss": 0.4498, "step": 5033 }, { "epoch": 1.1229087664510373, "grad_norm": 0.1670864075422287, "learning_rate": 1.3984714775383159e-05, "loss": 0.4719, "step": 5034 }, { "epoch": 1.1231318313629266, "grad_norm": 0.1660386323928833, "learning_rate": 1.3982556145144866e-05, "loss": 0.4869, "step": 5035 }, { "epoch": 1.1233548962748159, "grad_norm": 0.16415423154830933, "learning_rate": 1.3980397294327563e-05, "loss": 0.4806, "step": 5036 }, { "epoch": 1.1235779611867054, "grad_norm": 0.16121268272399902, "learning_rate": 1.3978238223050817e-05, "loss": 0.4677, "step": 5037 }, { "epoch": 1.1238010260985947, "grad_norm": 0.16271083056926727, "learning_rate": 1.3976078931434219e-05, "loss": 0.4749, "step": 5038 }, { "epoch": 1.124024091010484, "grad_norm": 0.16719701886177063, "learning_rate": 1.3973919419597354e-05, "loss": 0.4537, "step": 5039 }, { "epoch": 1.1242471559223735, "grad_norm": 0.15598838031291962, "learning_rate": 1.3971759687659841e-05, "loss": 0.4623, "step": 5040 }, { "epoch": 1.1244702208342627, "grad_norm": 0.1647387146949768, "learning_rate": 1.3969599735741288e-05, "loss": 0.4879, "step": 5041 }, { "epoch": 1.1246932857461522, "grad_norm": 0.15755842626094818, "learning_rate": 1.3967439563961334e-05, "loss": 0.4564, "step": 5042 }, { "epoch": 1.1249163506580415, "grad_norm": 0.1570240706205368, "learning_rate": 1.396527917243962e-05, "loss": 0.4593, "step": 5043 }, { "epoch": 1.1251394155699308, "grad_norm": 0.6927107572555542, "learning_rate": 1.3963118561295803e-05, "loss": 0.4905, "step": 5044 }, { "epoch": 1.1253624804818203, "grad_norm": 0.17146988213062286, "learning_rate": 1.3960957730649551e-05, "loss": 0.4754, "step": 5045 }, { "epoch": 1.1255855453937096, "grad_norm": 0.16707734763622284, "learning_rate": 1.3958796680620545e-05, "loss": 0.4578, "step": 5046 }, { "epoch": 1.1258086103055989, "grad_norm": 0.15594236552715302, "learning_rate": 1.3956635411328478e-05, "loss": 0.4556, "step": 5047 }, { "epoch": 1.1260316752174884, "grad_norm": 0.15944088995456696, "learning_rate": 1.395447392289305e-05, "loss": 0.4478, "step": 5048 }, { "epoch": 1.1262547401293777, "grad_norm": 0.16137385368347168, "learning_rate": 1.3952312215433987e-05, "loss": 0.4922, "step": 5049 }, { "epoch": 1.126477805041267, "grad_norm": 0.18752697110176086, "learning_rate": 1.3950150289071007e-05, "loss": 0.4641, "step": 5050 }, { "epoch": 1.1267008699531564, "grad_norm": 0.16448596119880676, "learning_rate": 1.394798814392386e-05, "loss": 0.475, "step": 5051 }, { "epoch": 1.1269239348650457, "grad_norm": 0.16754111647605896, "learning_rate": 1.3945825780112294e-05, "loss": 0.4709, "step": 5052 }, { "epoch": 1.127146999776935, "grad_norm": 0.165597066283226, "learning_rate": 1.394366319775608e-05, "loss": 0.4761, "step": 5053 }, { "epoch": 1.1273700646888245, "grad_norm": 0.16928669810295105, "learning_rate": 1.3941500396974984e-05, "loss": 0.5111, "step": 5054 }, { "epoch": 1.1275931296007138, "grad_norm": 0.16309335827827454, "learning_rate": 1.3939337377888808e-05, "loss": 0.4638, "step": 5055 }, { "epoch": 1.127816194512603, "grad_norm": 0.16362158954143524, "learning_rate": 1.3937174140617349e-05, "loss": 0.4496, "step": 5056 }, { "epoch": 1.1280392594244926, "grad_norm": 0.15970171988010406, "learning_rate": 1.3935010685280417e-05, "loss": 0.4583, "step": 5057 }, { "epoch": 1.1282623243363818, "grad_norm": 0.16845880448818207, "learning_rate": 1.3932847011997846e-05, "loss": 0.4705, "step": 5058 }, { "epoch": 1.1284853892482714, "grad_norm": 0.1621864140033722, "learning_rate": 1.3930683120889463e-05, "loss": 0.4659, "step": 5059 }, { "epoch": 1.1287084541601606, "grad_norm": 0.16252627968788147, "learning_rate": 1.3928519012075128e-05, "loss": 0.4628, "step": 5060 }, { "epoch": 1.12893151907205, "grad_norm": 0.1627832055091858, "learning_rate": 1.3926354685674697e-05, "loss": 0.4617, "step": 5061 }, { "epoch": 1.1291545839839394, "grad_norm": 0.1639028936624527, "learning_rate": 1.3924190141808048e-05, "loss": 0.4626, "step": 5062 }, { "epoch": 1.1293776488958287, "grad_norm": 0.1629188507795334, "learning_rate": 1.3922025380595061e-05, "loss": 0.4905, "step": 5063 }, { "epoch": 1.129600713807718, "grad_norm": 0.16841430962085724, "learning_rate": 1.391986040215564e-05, "loss": 0.4512, "step": 5064 }, { "epoch": 1.1298237787196075, "grad_norm": 0.1598411500453949, "learning_rate": 1.3917695206609693e-05, "loss": 0.4453, "step": 5065 }, { "epoch": 1.1300468436314968, "grad_norm": 0.15308506786823273, "learning_rate": 1.3915529794077142e-05, "loss": 0.4563, "step": 5066 }, { "epoch": 1.130269908543386, "grad_norm": 0.16865313053131104, "learning_rate": 1.3913364164677922e-05, "loss": 0.4587, "step": 5067 }, { "epoch": 1.1304929734552756, "grad_norm": 0.1975971907377243, "learning_rate": 1.3911198318531977e-05, "loss": 0.4523, "step": 5068 }, { "epoch": 1.1307160383671648, "grad_norm": 0.1655057817697525, "learning_rate": 1.3909032255759267e-05, "loss": 0.4446, "step": 5069 }, { "epoch": 1.1309391032790541, "grad_norm": 0.16097813844680786, "learning_rate": 1.3906865976479766e-05, "loss": 0.4828, "step": 5070 }, { "epoch": 1.1311621681909436, "grad_norm": 0.15927664935588837, "learning_rate": 1.3904699480813446e-05, "loss": 0.4715, "step": 5071 }, { "epoch": 1.131385233102833, "grad_norm": 0.15957841277122498, "learning_rate": 1.3902532768880313e-05, "loss": 0.4638, "step": 5072 }, { "epoch": 1.1316082980147222, "grad_norm": 0.16548919677734375, "learning_rate": 1.3900365840800363e-05, "loss": 0.495, "step": 5073 }, { "epoch": 1.1318313629266117, "grad_norm": 0.1837669163942337, "learning_rate": 1.3898198696693621e-05, "loss": 0.4866, "step": 5074 }, { "epoch": 1.132054427838501, "grad_norm": 0.16433456540107727, "learning_rate": 1.3896031336680111e-05, "loss": 0.4637, "step": 5075 }, { "epoch": 1.1322774927503905, "grad_norm": 0.15830080211162567, "learning_rate": 1.3893863760879882e-05, "loss": 0.4644, "step": 5076 }, { "epoch": 1.1325005576622797, "grad_norm": 0.16347894072532654, "learning_rate": 1.3891695969412982e-05, "loss": 0.4764, "step": 5077 }, { "epoch": 1.132723622574169, "grad_norm": 0.15911467373371124, "learning_rate": 1.388952796239948e-05, "loss": 0.4585, "step": 5078 }, { "epoch": 1.1329466874860585, "grad_norm": 0.16895873844623566, "learning_rate": 1.3887359739959455e-05, "loss": 0.4787, "step": 5079 }, { "epoch": 1.1331697523979478, "grad_norm": 0.16661864519119263, "learning_rate": 1.3885191302212993e-05, "loss": 0.4987, "step": 5080 }, { "epoch": 1.133392817309837, "grad_norm": 0.16188743710517883, "learning_rate": 1.38830226492802e-05, "loss": 0.4428, "step": 5081 }, { "epoch": 1.1336158822217266, "grad_norm": 0.1691635102033615, "learning_rate": 1.3880853781281187e-05, "loss": 0.4713, "step": 5082 }, { "epoch": 1.1338389471336159, "grad_norm": 0.16617491841316223, "learning_rate": 1.387868469833608e-05, "loss": 0.4687, "step": 5083 }, { "epoch": 1.1340620120455052, "grad_norm": 0.17212171852588654, "learning_rate": 1.3876515400565016e-05, "loss": 0.4912, "step": 5084 }, { "epoch": 1.1342850769573947, "grad_norm": 0.1617995649576187, "learning_rate": 1.3874345888088145e-05, "loss": 0.4807, "step": 5085 }, { "epoch": 1.134508141869284, "grad_norm": 0.16481629014015198, "learning_rate": 1.3872176161025627e-05, "loss": 0.4652, "step": 5086 }, { "epoch": 1.1347312067811732, "grad_norm": 0.16548612713813782, "learning_rate": 1.3870006219497642e-05, "loss": 0.4517, "step": 5087 }, { "epoch": 1.1349542716930627, "grad_norm": 0.17424030601978302, "learning_rate": 1.3867836063624363e-05, "loss": 0.4804, "step": 5088 }, { "epoch": 1.135177336604952, "grad_norm": 0.17292533814907074, "learning_rate": 1.3865665693525994e-05, "loss": 0.4776, "step": 5089 }, { "epoch": 1.1354004015168413, "grad_norm": 0.1664893478155136, "learning_rate": 1.3863495109322744e-05, "loss": 0.4653, "step": 5090 }, { "epoch": 1.1356234664287308, "grad_norm": 0.16548456251621246, "learning_rate": 1.3861324311134832e-05, "loss": 0.4709, "step": 5091 }, { "epoch": 1.13584653134062, "grad_norm": 0.1643359214067459, "learning_rate": 1.3859153299082493e-05, "loss": 0.4361, "step": 5092 }, { "epoch": 1.1360695962525096, "grad_norm": 0.17173811793327332, "learning_rate": 1.3856982073285965e-05, "loss": 0.4482, "step": 5093 }, { "epoch": 1.1362926611643989, "grad_norm": 0.1580163687467575, "learning_rate": 1.3854810633865512e-05, "loss": 0.4395, "step": 5094 }, { "epoch": 1.1365157260762881, "grad_norm": 0.16304674744606018, "learning_rate": 1.3852638980941398e-05, "loss": 0.4754, "step": 5095 }, { "epoch": 1.1367387909881776, "grad_norm": 0.16688407957553864, "learning_rate": 1.38504671146339e-05, "loss": 0.5081, "step": 5096 }, { "epoch": 1.136961855900067, "grad_norm": 0.16496771574020386, "learning_rate": 1.3848295035063317e-05, "loss": 0.4537, "step": 5097 }, { "epoch": 1.1371849208119562, "grad_norm": 0.16779771447181702, "learning_rate": 1.3846122742349946e-05, "loss": 0.4608, "step": 5098 }, { "epoch": 1.1374079857238457, "grad_norm": 0.1593242734670639, "learning_rate": 1.3843950236614103e-05, "loss": 0.4761, "step": 5099 }, { "epoch": 1.137631050635735, "grad_norm": 0.16632351279258728, "learning_rate": 1.384177751797612e-05, "loss": 0.4723, "step": 5100 }, { "epoch": 1.1378541155476243, "grad_norm": 0.16760534048080444, "learning_rate": 1.383960458655633e-05, "loss": 0.4509, "step": 5101 }, { "epoch": 1.1380771804595138, "grad_norm": 0.16486233472824097, "learning_rate": 1.3837431442475089e-05, "loss": 0.4806, "step": 5102 }, { "epoch": 1.138300245371403, "grad_norm": 0.1571461409330368, "learning_rate": 1.3835258085852752e-05, "loss": 0.4564, "step": 5103 }, { "epoch": 1.1385233102832923, "grad_norm": 0.16537711024284363, "learning_rate": 1.38330845168097e-05, "loss": 0.4556, "step": 5104 }, { "epoch": 1.1387463751951818, "grad_norm": 0.1652860939502716, "learning_rate": 1.3830910735466313e-05, "loss": 0.447, "step": 5105 }, { "epoch": 1.1389694401070711, "grad_norm": 0.15692059695720673, "learning_rate": 1.3828736741942998e-05, "loss": 0.4566, "step": 5106 }, { "epoch": 1.1391925050189604, "grad_norm": 0.16122755408287048, "learning_rate": 1.3826562536360155e-05, "loss": 0.4692, "step": 5107 }, { "epoch": 1.13941556993085, "grad_norm": 0.1586706042289734, "learning_rate": 1.382438811883821e-05, "loss": 0.4316, "step": 5108 }, { "epoch": 1.1396386348427392, "grad_norm": 0.16200071573257446, "learning_rate": 1.3822213489497594e-05, "loss": 0.4704, "step": 5109 }, { "epoch": 1.1398616997546287, "grad_norm": 0.5188888311386108, "learning_rate": 1.3820038648458748e-05, "loss": 0.49, "step": 5110 }, { "epoch": 1.140084764666518, "grad_norm": 0.15934044122695923, "learning_rate": 1.3817863595842138e-05, "loss": 0.4456, "step": 5111 }, { "epoch": 1.1403078295784073, "grad_norm": 0.17257168889045715, "learning_rate": 1.3815688331768224e-05, "loss": 0.4655, "step": 5112 }, { "epoch": 1.1405308944902968, "grad_norm": 0.16849681735038757, "learning_rate": 1.3813512856357491e-05, "loss": 0.4461, "step": 5113 }, { "epoch": 1.140753959402186, "grad_norm": 0.16747885942459106, "learning_rate": 1.3811337169730428e-05, "loss": 0.473, "step": 5114 }, { "epoch": 1.1409770243140753, "grad_norm": 0.1711420714855194, "learning_rate": 1.3809161272007536e-05, "loss": 0.4851, "step": 5115 }, { "epoch": 1.1412000892259648, "grad_norm": 0.1751970797777176, "learning_rate": 1.3806985163309334e-05, "loss": 0.4955, "step": 5116 }, { "epoch": 1.1414231541378541, "grad_norm": 0.16653122007846832, "learning_rate": 1.3804808843756348e-05, "loss": 0.4697, "step": 5117 }, { "epoch": 1.1416462190497434, "grad_norm": 0.158418208360672, "learning_rate": 1.3802632313469111e-05, "loss": 0.433, "step": 5118 }, { "epoch": 1.141869283961633, "grad_norm": 0.16599524021148682, "learning_rate": 1.3800455572568182e-05, "loss": 0.49, "step": 5119 }, { "epoch": 1.1420923488735222, "grad_norm": 0.17085126042366028, "learning_rate": 1.3798278621174113e-05, "loss": 0.4914, "step": 5120 }, { "epoch": 1.1423154137854115, "grad_norm": 0.16169953346252441, "learning_rate": 1.3796101459407485e-05, "loss": 0.4851, "step": 5121 }, { "epoch": 1.142538478697301, "grad_norm": 0.19225676357746124, "learning_rate": 1.3793924087388876e-05, "loss": 0.4554, "step": 5122 }, { "epoch": 1.1427615436091902, "grad_norm": 0.15610942244529724, "learning_rate": 1.379174650523889e-05, "loss": 0.4436, "step": 5123 }, { "epoch": 1.1429846085210795, "grad_norm": 0.16388073563575745, "learning_rate": 1.3789568713078129e-05, "loss": 0.4647, "step": 5124 }, { "epoch": 1.143207673432969, "grad_norm": 0.15931613743305206, "learning_rate": 1.3787390711027217e-05, "loss": 0.4707, "step": 5125 }, { "epoch": 1.1434307383448583, "grad_norm": 0.1648622304201126, "learning_rate": 1.3785212499206783e-05, "loss": 0.4628, "step": 5126 }, { "epoch": 1.1436538032567478, "grad_norm": 0.16240182518959045, "learning_rate": 1.3783034077737472e-05, "loss": 0.4735, "step": 5127 }, { "epoch": 1.143876868168637, "grad_norm": 0.15895162522792816, "learning_rate": 1.3780855446739937e-05, "loss": 0.4587, "step": 5128 }, { "epoch": 1.1440999330805264, "grad_norm": 0.1622496396303177, "learning_rate": 1.3778676606334844e-05, "loss": 0.4541, "step": 5129 }, { "epoch": 1.1443229979924159, "grad_norm": 0.15852504968643188, "learning_rate": 1.3776497556642874e-05, "loss": 0.4546, "step": 5130 }, { "epoch": 1.1445460629043052, "grad_norm": 0.16722345352172852, "learning_rate": 1.377431829778471e-05, "loss": 0.4754, "step": 5131 }, { "epoch": 1.1447691278161944, "grad_norm": 0.1600501835346222, "learning_rate": 1.377213882988106e-05, "loss": 0.4851, "step": 5132 }, { "epoch": 1.144992192728084, "grad_norm": 0.1717759370803833, "learning_rate": 1.3769959153052634e-05, "loss": 0.4581, "step": 5133 }, { "epoch": 1.1452152576399732, "grad_norm": 0.1694609820842743, "learning_rate": 1.3767779267420158e-05, "loss": 0.4584, "step": 5134 }, { "epoch": 1.1454383225518625, "grad_norm": 0.16004528105258942, "learning_rate": 1.3765599173104362e-05, "loss": 0.4768, "step": 5135 }, { "epoch": 1.145661387463752, "grad_norm": 0.15713921189308167, "learning_rate": 1.3763418870225999e-05, "loss": 0.4515, "step": 5136 }, { "epoch": 1.1458844523756413, "grad_norm": 0.16554328799247742, "learning_rate": 1.3761238358905826e-05, "loss": 0.4856, "step": 5137 }, { "epoch": 1.1461075172875306, "grad_norm": 0.1595989465713501, "learning_rate": 1.3759057639264614e-05, "loss": 0.4695, "step": 5138 }, { "epoch": 1.14633058219942, "grad_norm": 0.16599664092063904, "learning_rate": 1.3756876711423143e-05, "loss": 0.4821, "step": 5139 }, { "epoch": 1.1465536471113094, "grad_norm": 0.15603865683078766, "learning_rate": 1.3754695575502211e-05, "loss": 0.4469, "step": 5140 }, { "epoch": 1.1467767120231986, "grad_norm": 0.16003453731536865, "learning_rate": 1.3752514231622617e-05, "loss": 0.4619, "step": 5141 }, { "epoch": 1.1469997769350881, "grad_norm": 0.15717166662216187, "learning_rate": 1.375033267990518e-05, "loss": 0.4585, "step": 5142 }, { "epoch": 1.1472228418469774, "grad_norm": 0.15859000384807587, "learning_rate": 1.374815092047073e-05, "loss": 0.4446, "step": 5143 }, { "epoch": 1.147445906758867, "grad_norm": 0.16028450429439545, "learning_rate": 1.3745968953440105e-05, "loss": 0.4512, "step": 5144 }, { "epoch": 1.1476689716707562, "grad_norm": 0.1540936380624771, "learning_rate": 1.3743786778934158e-05, "loss": 0.4286, "step": 5145 }, { "epoch": 1.1478920365826455, "grad_norm": 0.15674646198749542, "learning_rate": 1.3741604397073748e-05, "loss": 0.469, "step": 5146 }, { "epoch": 1.148115101494535, "grad_norm": 0.16240215301513672, "learning_rate": 1.3739421807979753e-05, "loss": 0.4609, "step": 5147 }, { "epoch": 1.1483381664064243, "grad_norm": 0.1656564623117447, "learning_rate": 1.3737239011773054e-05, "loss": 0.4829, "step": 5148 }, { "epoch": 1.1485612313183136, "grad_norm": 0.16485963761806488, "learning_rate": 1.3735056008574551e-05, "loss": 0.4494, "step": 5149 }, { "epoch": 1.148784296230203, "grad_norm": 0.16284818947315216, "learning_rate": 1.3732872798505153e-05, "loss": 0.4504, "step": 5150 }, { "epoch": 1.1490073611420923, "grad_norm": 0.21305973827838898, "learning_rate": 1.373068938168578e-05, "loss": 0.4496, "step": 5151 }, { "epoch": 1.1492304260539816, "grad_norm": 0.16297850012779236, "learning_rate": 1.3728505758237358e-05, "loss": 0.4799, "step": 5152 }, { "epoch": 1.1494534909658711, "grad_norm": 0.16585685312747955, "learning_rate": 1.3726321928280837e-05, "loss": 0.4608, "step": 5153 }, { "epoch": 1.1496765558777604, "grad_norm": 0.16437405347824097, "learning_rate": 1.3724137891937167e-05, "loss": 0.4812, "step": 5154 }, { "epoch": 1.14989962078965, "grad_norm": 0.15935295820236206, "learning_rate": 1.3721953649327316e-05, "loss": 0.4633, "step": 5155 }, { "epoch": 1.1501226857015392, "grad_norm": 0.15900611877441406, "learning_rate": 1.3719769200572258e-05, "loss": 0.4401, "step": 5156 }, { "epoch": 1.1503457506134285, "grad_norm": 0.172005295753479, "learning_rate": 1.3717584545792983e-05, "loss": 0.4634, "step": 5157 }, { "epoch": 1.1505688155253178, "grad_norm": 0.16333134472370148, "learning_rate": 1.3715399685110492e-05, "loss": 0.4391, "step": 5158 }, { "epoch": 1.1507918804372073, "grad_norm": 0.16098088026046753, "learning_rate": 1.3713214618645796e-05, "loss": 0.4484, "step": 5159 }, { "epoch": 1.1510149453490965, "grad_norm": 0.17146125435829163, "learning_rate": 1.3711029346519917e-05, "loss": 0.4606, "step": 5160 }, { "epoch": 1.151238010260986, "grad_norm": 0.16574114561080933, "learning_rate": 1.3708843868853889e-05, "loss": 0.4862, "step": 5161 }, { "epoch": 1.1514610751728753, "grad_norm": 0.168543741106987, "learning_rate": 1.370665818576876e-05, "loss": 0.4608, "step": 5162 }, { "epoch": 1.1516841400847646, "grad_norm": 0.16033907234668732, "learning_rate": 1.3704472297385583e-05, "loss": 0.439, "step": 5163 }, { "epoch": 1.151907204996654, "grad_norm": 0.16206419467926025, "learning_rate": 1.3702286203825429e-05, "loss": 0.4707, "step": 5164 }, { "epoch": 1.1521302699085434, "grad_norm": 0.160027876496315, "learning_rate": 1.3700099905209374e-05, "loss": 0.4603, "step": 5165 }, { "epoch": 1.1523533348204327, "grad_norm": 0.17743688821792603, "learning_rate": 1.3697913401658516e-05, "loss": 0.5053, "step": 5166 }, { "epoch": 1.1525763997323222, "grad_norm": 0.15926334261894226, "learning_rate": 1.3695726693293951e-05, "loss": 0.4685, "step": 5167 }, { "epoch": 1.1527994646442115, "grad_norm": 0.18017716705799103, "learning_rate": 1.3693539780236798e-05, "loss": 0.4864, "step": 5168 }, { "epoch": 1.1530225295561007, "grad_norm": 0.1655382215976715, "learning_rate": 1.3691352662608175e-05, "loss": 0.4607, "step": 5169 }, { "epoch": 1.1532455944679902, "grad_norm": 0.16363778710365295, "learning_rate": 1.3689165340529222e-05, "loss": 0.4425, "step": 5170 }, { "epoch": 1.1534686593798795, "grad_norm": 0.1638006567955017, "learning_rate": 1.3686977814121087e-05, "loss": 0.4428, "step": 5171 }, { "epoch": 1.153691724291769, "grad_norm": 0.1594667136669159, "learning_rate": 1.368479008350493e-05, "loss": 0.4417, "step": 5172 }, { "epoch": 1.1539147892036583, "grad_norm": 0.1636561006307602, "learning_rate": 1.3682602148801917e-05, "loss": 0.4602, "step": 5173 }, { "epoch": 1.1541378541155476, "grad_norm": 0.1705152690410614, "learning_rate": 1.3680414010133237e-05, "loss": 0.4494, "step": 5174 }, { "epoch": 1.1543609190274369, "grad_norm": 0.1863928884267807, "learning_rate": 1.3678225667620075e-05, "loss": 0.4592, "step": 5175 }, { "epoch": 1.1545839839393264, "grad_norm": 0.1695355474948883, "learning_rate": 1.3676037121383638e-05, "loss": 0.5064, "step": 5176 }, { "epoch": 1.1548070488512157, "grad_norm": 0.16180136799812317, "learning_rate": 1.3673848371545145e-05, "loss": 0.473, "step": 5177 }, { "epoch": 1.1550301137631052, "grad_norm": 0.18443214893341064, "learning_rate": 1.3671659418225815e-05, "loss": 0.4872, "step": 5178 }, { "epoch": 1.1552531786749944, "grad_norm": 0.16797620058059692, "learning_rate": 1.3669470261546896e-05, "loss": 0.4538, "step": 5179 }, { "epoch": 1.1554762435868837, "grad_norm": 0.16609326004981995, "learning_rate": 1.3667280901629627e-05, "loss": 0.5077, "step": 5180 }, { "epoch": 1.1556993084987732, "grad_norm": 0.16129672527313232, "learning_rate": 1.3665091338595277e-05, "loss": 0.4544, "step": 5181 }, { "epoch": 1.1559223734106625, "grad_norm": 0.16736097633838654, "learning_rate": 1.3662901572565114e-05, "loss": 0.4633, "step": 5182 }, { "epoch": 1.1561454383225518, "grad_norm": 0.21419832110404968, "learning_rate": 1.3660711603660422e-05, "loss": 0.4508, "step": 5183 }, { "epoch": 1.1563685032344413, "grad_norm": 0.16937695443630219, "learning_rate": 1.3658521432002494e-05, "loss": 0.4729, "step": 5184 }, { "epoch": 1.1565915681463306, "grad_norm": 0.15548932552337646, "learning_rate": 1.3656331057712637e-05, "loss": 0.4829, "step": 5185 }, { "epoch": 1.1568146330582199, "grad_norm": 0.17126090824604034, "learning_rate": 1.3654140480912164e-05, "loss": 0.4715, "step": 5186 }, { "epoch": 1.1570376979701094, "grad_norm": 0.18009397387504578, "learning_rate": 1.3651949701722407e-05, "loss": 0.4883, "step": 5187 }, { "epoch": 1.1572607628819986, "grad_norm": 0.171475350856781, "learning_rate": 1.3649758720264705e-05, "loss": 0.4411, "step": 5188 }, { "epoch": 1.1574838277938881, "grad_norm": 0.17869600653648376, "learning_rate": 1.3647567536660407e-05, "loss": 0.4841, "step": 5189 }, { "epoch": 1.1577068927057774, "grad_norm": 0.1822163164615631, "learning_rate": 1.3645376151030871e-05, "loss": 0.4595, "step": 5190 }, { "epoch": 1.1579299576176667, "grad_norm": 0.17729108035564423, "learning_rate": 1.3643184563497479e-05, "loss": 0.4605, "step": 5191 }, { "epoch": 1.158153022529556, "grad_norm": 0.16725416481494904, "learning_rate": 1.3640992774181605e-05, "loss": 0.4375, "step": 5192 }, { "epoch": 1.1583760874414455, "grad_norm": 0.17714278399944305, "learning_rate": 1.3638800783204653e-05, "loss": 0.4944, "step": 5193 }, { "epoch": 1.1585991523533348, "grad_norm": 0.159885436296463, "learning_rate": 1.3636608590688019e-05, "loss": 0.4793, "step": 5194 }, { "epoch": 1.1588222172652243, "grad_norm": 0.16276371479034424, "learning_rate": 1.363441619675313e-05, "loss": 0.4872, "step": 5195 }, { "epoch": 1.1590452821771136, "grad_norm": 0.16415032744407654, "learning_rate": 1.3632223601521409e-05, "loss": 0.4632, "step": 5196 }, { "epoch": 1.1592683470890028, "grad_norm": 0.1709449738264084, "learning_rate": 1.3630030805114297e-05, "loss": 0.485, "step": 5197 }, { "epoch": 1.1594914120008923, "grad_norm": 0.16452427208423615, "learning_rate": 1.3627837807653249e-05, "loss": 0.4306, "step": 5198 }, { "epoch": 1.1597144769127816, "grad_norm": 0.18321935832500458, "learning_rate": 1.3625644609259716e-05, "loss": 0.4982, "step": 5199 }, { "epoch": 1.159937541824671, "grad_norm": 0.1770065277814865, "learning_rate": 1.3623451210055186e-05, "loss": 0.496, "step": 5200 }, { "epoch": 1.1601606067365604, "grad_norm": 0.16729894280433655, "learning_rate": 1.3621257610161129e-05, "loss": 0.4844, "step": 5201 }, { "epoch": 1.1603836716484497, "grad_norm": 0.1584504395723343, "learning_rate": 1.3619063809699054e-05, "loss": 0.4801, "step": 5202 }, { "epoch": 1.160606736560339, "grad_norm": 0.15807606279850006, "learning_rate": 1.3616869808790453e-05, "loss": 0.4623, "step": 5203 }, { "epoch": 1.1608298014722285, "grad_norm": 0.15968340635299683, "learning_rate": 1.3614675607556857e-05, "loss": 0.4693, "step": 5204 }, { "epoch": 1.1610528663841178, "grad_norm": 0.16744199395179749, "learning_rate": 1.3612481206119786e-05, "loss": 0.4821, "step": 5205 }, { "epoch": 1.1612759312960073, "grad_norm": 0.16137133538722992, "learning_rate": 1.3610286604600782e-05, "loss": 0.4757, "step": 5206 }, { "epoch": 1.1614989962078965, "grad_norm": 0.1617872714996338, "learning_rate": 1.3608091803121397e-05, "loss": 0.4703, "step": 5207 }, { "epoch": 1.1617220611197858, "grad_norm": 0.16734254360198975, "learning_rate": 1.360589680180319e-05, "loss": 0.4706, "step": 5208 }, { "epoch": 1.161945126031675, "grad_norm": 0.163192480802536, "learning_rate": 1.3603701600767741e-05, "loss": 0.4722, "step": 5209 }, { "epoch": 1.1621681909435646, "grad_norm": 0.16038168966770172, "learning_rate": 1.3601506200136624e-05, "loss": 0.4443, "step": 5210 }, { "epoch": 1.1623912558554539, "grad_norm": 0.16467641294002533, "learning_rate": 1.3599310600031443e-05, "loss": 0.4465, "step": 5211 }, { "epoch": 1.1626143207673434, "grad_norm": 0.1640377789735794, "learning_rate": 1.3597114800573799e-05, "loss": 0.4722, "step": 5212 }, { "epoch": 1.1628373856792327, "grad_norm": 0.16441082954406738, "learning_rate": 1.359491880188531e-05, "loss": 0.4617, "step": 5213 }, { "epoch": 1.163060450591122, "grad_norm": 0.16120347380638123, "learning_rate": 1.3592722604087604e-05, "loss": 0.4464, "step": 5214 }, { "epoch": 1.1632835155030115, "grad_norm": 0.17573657631874084, "learning_rate": 1.3590526207302324e-05, "loss": 0.5029, "step": 5215 }, { "epoch": 1.1635065804149007, "grad_norm": 0.16203901171684265, "learning_rate": 1.3588329611651117e-05, "loss": 0.4654, "step": 5216 }, { "epoch": 1.16372964532679, "grad_norm": 0.1684379279613495, "learning_rate": 1.3586132817255644e-05, "loss": 0.4706, "step": 5217 }, { "epoch": 1.1639527102386795, "grad_norm": 0.15993818640708923, "learning_rate": 1.3583935824237576e-05, "loss": 0.4587, "step": 5218 }, { "epoch": 1.1641757751505688, "grad_norm": 0.1687501221895218, "learning_rate": 1.35817386327186e-05, "loss": 0.4467, "step": 5219 }, { "epoch": 1.164398840062458, "grad_norm": 0.16527238488197327, "learning_rate": 1.3579541242820407e-05, "loss": 0.4617, "step": 5220 }, { "epoch": 1.1646219049743476, "grad_norm": 0.16829682886600494, "learning_rate": 1.3577343654664705e-05, "loss": 0.4582, "step": 5221 }, { "epoch": 1.1648449698862369, "grad_norm": 0.186698317527771, "learning_rate": 1.3575145868373207e-05, "loss": 0.4878, "step": 5222 }, { "epoch": 1.1650680347981264, "grad_norm": 0.19950494170188904, "learning_rate": 1.3572947884067644e-05, "loss": 0.4636, "step": 5223 }, { "epoch": 1.1652910997100157, "grad_norm": 0.16333967447280884, "learning_rate": 1.3570749701869751e-05, "loss": 0.4772, "step": 5224 }, { "epoch": 1.165514164621905, "grad_norm": 0.16258838772773743, "learning_rate": 1.3568551321901282e-05, "loss": 0.4968, "step": 5225 }, { "epoch": 1.1657372295337942, "grad_norm": 0.18643935024738312, "learning_rate": 1.356635274428399e-05, "loss": 0.4937, "step": 5226 }, { "epoch": 1.1659602944456837, "grad_norm": 0.1727738380432129, "learning_rate": 1.3564153969139654e-05, "loss": 0.4915, "step": 5227 }, { "epoch": 1.166183359357573, "grad_norm": 0.16102762520313263, "learning_rate": 1.3561954996590047e-05, "loss": 0.4534, "step": 5228 }, { "epoch": 1.1664064242694625, "grad_norm": 0.17399117350578308, "learning_rate": 1.3559755826756968e-05, "loss": 0.4775, "step": 5229 }, { "epoch": 1.1666294891813518, "grad_norm": 0.1659899652004242, "learning_rate": 1.3557556459762223e-05, "loss": 0.4624, "step": 5230 }, { "epoch": 1.166852554093241, "grad_norm": 0.17996659874916077, "learning_rate": 1.3555356895727618e-05, "loss": 0.4938, "step": 5231 }, { "epoch": 1.1670756190051306, "grad_norm": 0.16586771607398987, "learning_rate": 1.355315713477499e-05, "loss": 0.4788, "step": 5232 }, { "epoch": 1.1672986839170199, "grad_norm": 0.1771279126405716, "learning_rate": 1.3550957177026164e-05, "loss": 0.4752, "step": 5233 }, { "epoch": 1.1675217488289091, "grad_norm": 0.17474165558815002, "learning_rate": 1.3548757022602997e-05, "loss": 0.4734, "step": 5234 }, { "epoch": 1.1677448137407986, "grad_norm": 0.16958124935626984, "learning_rate": 1.3546556671627341e-05, "loss": 0.4713, "step": 5235 }, { "epoch": 1.167967878652688, "grad_norm": 0.16799892485141754, "learning_rate": 1.354435612422107e-05, "loss": 0.4604, "step": 5236 }, { "epoch": 1.1681909435645772, "grad_norm": 0.17070333659648895, "learning_rate": 1.3542155380506059e-05, "loss": 0.4706, "step": 5237 }, { "epoch": 1.1684140084764667, "grad_norm": 0.1612580418586731, "learning_rate": 1.3539954440604206e-05, "loss": 0.4619, "step": 5238 }, { "epoch": 1.168637073388356, "grad_norm": 0.16792915761470795, "learning_rate": 1.3537753304637406e-05, "loss": 0.4564, "step": 5239 }, { "epoch": 1.1688601383002455, "grad_norm": 0.16356982290744781, "learning_rate": 1.3535551972727577e-05, "loss": 0.4843, "step": 5240 }, { "epoch": 1.1690832032121348, "grad_norm": 0.161970853805542, "learning_rate": 1.3533350444996636e-05, "loss": 0.4573, "step": 5241 }, { "epoch": 1.169306268124024, "grad_norm": 0.17388132214546204, "learning_rate": 1.3531148721566525e-05, "loss": 0.4739, "step": 5242 }, { "epoch": 1.1695293330359133, "grad_norm": 0.1602068692445755, "learning_rate": 1.3528946802559184e-05, "loss": 0.4584, "step": 5243 }, { "epoch": 1.1697523979478028, "grad_norm": 0.1629067063331604, "learning_rate": 1.3526744688096574e-05, "loss": 0.4653, "step": 5244 }, { "epoch": 1.1699754628596921, "grad_norm": 0.16808199882507324, "learning_rate": 1.3524542378300658e-05, "loss": 0.4588, "step": 5245 }, { "epoch": 1.1701985277715816, "grad_norm": 0.17957061529159546, "learning_rate": 1.3522339873293416e-05, "loss": 0.49, "step": 5246 }, { "epoch": 1.170421592683471, "grad_norm": 0.15982255339622498, "learning_rate": 1.3520137173196832e-05, "loss": 0.4456, "step": 5247 }, { "epoch": 1.1706446575953602, "grad_norm": 0.16950003802776337, "learning_rate": 1.3517934278132909e-05, "loss": 0.4802, "step": 5248 }, { "epoch": 1.1708677225072497, "grad_norm": 0.16196782886981964, "learning_rate": 1.351573118822366e-05, "loss": 0.4905, "step": 5249 }, { "epoch": 1.171090787419139, "grad_norm": 0.16096629202365875, "learning_rate": 1.3513527903591101e-05, "loss": 0.4829, "step": 5250 }, { "epoch": 1.1713138523310282, "grad_norm": 0.18132293224334717, "learning_rate": 1.3511324424357269e-05, "loss": 0.485, "step": 5251 }, { "epoch": 1.1715369172429178, "grad_norm": 0.18736086785793304, "learning_rate": 1.3509120750644198e-05, "loss": 0.4569, "step": 5252 }, { "epoch": 1.171759982154807, "grad_norm": 0.1668289601802826, "learning_rate": 1.350691688257395e-05, "loss": 0.4785, "step": 5253 }, { "epoch": 1.1719830470666963, "grad_norm": 0.16315414011478424, "learning_rate": 1.3504712820268584e-05, "loss": 0.4545, "step": 5254 }, { "epoch": 1.1722061119785858, "grad_norm": 0.1799056977033615, "learning_rate": 1.3502508563850179e-05, "loss": 0.5051, "step": 5255 }, { "epoch": 1.172429176890475, "grad_norm": 0.16646715998649597, "learning_rate": 1.3500304113440815e-05, "loss": 0.4531, "step": 5256 }, { "epoch": 1.1726522418023646, "grad_norm": 0.16029830276966095, "learning_rate": 1.3498099469162594e-05, "loss": 0.4614, "step": 5257 }, { "epoch": 1.1728753067142539, "grad_norm": 0.16910234093666077, "learning_rate": 1.3495894631137618e-05, "loss": 0.484, "step": 5258 }, { "epoch": 1.1730983716261432, "grad_norm": 0.16509772837162018, "learning_rate": 1.349368959948801e-05, "loss": 0.4458, "step": 5259 }, { "epoch": 1.1733214365380324, "grad_norm": 0.16832312941551208, "learning_rate": 1.3491484374335893e-05, "loss": 0.4574, "step": 5260 }, { "epoch": 1.173544501449922, "grad_norm": 0.15936195850372314, "learning_rate": 1.3489278955803409e-05, "loss": 0.4441, "step": 5261 }, { "epoch": 1.1737675663618112, "grad_norm": 0.17124436795711517, "learning_rate": 1.348707334401271e-05, "loss": 0.4525, "step": 5262 }, { "epoch": 1.1739906312737007, "grad_norm": 0.16587725281715393, "learning_rate": 1.3484867539085952e-05, "loss": 0.4683, "step": 5263 }, { "epoch": 1.17421369618559, "grad_norm": 0.16889216005802155, "learning_rate": 1.3482661541145315e-05, "loss": 0.4646, "step": 5264 }, { "epoch": 1.1744367610974793, "grad_norm": 0.16621263325214386, "learning_rate": 1.3480455350312968e-05, "loss": 0.4614, "step": 5265 }, { "epoch": 1.1746598260093688, "grad_norm": 0.1614251285791397, "learning_rate": 1.3478248966711115e-05, "loss": 0.4549, "step": 5266 }, { "epoch": 1.174882890921258, "grad_norm": 0.16062335669994354, "learning_rate": 1.3476042390461954e-05, "loss": 0.4592, "step": 5267 }, { "epoch": 1.1751059558331474, "grad_norm": 0.16389133036136627, "learning_rate": 1.3473835621687701e-05, "loss": 0.4582, "step": 5268 }, { "epoch": 1.1753290207450369, "grad_norm": 0.16292013227939606, "learning_rate": 1.3471628660510576e-05, "loss": 0.4444, "step": 5269 }, { "epoch": 1.1755520856569261, "grad_norm": 0.16953033208847046, "learning_rate": 1.3469421507052824e-05, "loss": 0.4735, "step": 5270 }, { "epoch": 1.1757751505688154, "grad_norm": 0.1629514843225479, "learning_rate": 1.346721416143668e-05, "loss": 0.4534, "step": 5271 }, { "epoch": 1.175998215480705, "grad_norm": 0.1695055365562439, "learning_rate": 1.346500662378441e-05, "loss": 0.4498, "step": 5272 }, { "epoch": 1.1762212803925942, "grad_norm": 0.1747894585132599, "learning_rate": 1.3462798894218278e-05, "loss": 0.4773, "step": 5273 }, { "epoch": 1.1764443453044837, "grad_norm": 0.16201353073120117, "learning_rate": 1.3460590972860561e-05, "loss": 0.4401, "step": 5274 }, { "epoch": 1.176667410216373, "grad_norm": 0.16511163115501404, "learning_rate": 1.3458382859833545e-05, "loss": 0.4652, "step": 5275 }, { "epoch": 1.1768904751282623, "grad_norm": 0.17246244847774506, "learning_rate": 1.3456174555259535e-05, "loss": 0.4619, "step": 5276 }, { "epoch": 1.1771135400401516, "grad_norm": 0.16072800755500793, "learning_rate": 1.3453966059260836e-05, "loss": 0.473, "step": 5277 }, { "epoch": 1.177336604952041, "grad_norm": 0.16221876442432404, "learning_rate": 1.345175737195977e-05, "loss": 0.4588, "step": 5278 }, { "epoch": 1.1775596698639303, "grad_norm": 0.16264645755290985, "learning_rate": 1.344954849347867e-05, "loss": 0.4536, "step": 5279 }, { "epoch": 1.1777827347758199, "grad_norm": 0.1796528697013855, "learning_rate": 1.3447339423939876e-05, "loss": 0.4775, "step": 5280 }, { "epoch": 1.1780057996877091, "grad_norm": 0.1555919647216797, "learning_rate": 1.3445130163465739e-05, "loss": 0.4576, "step": 5281 }, { "epoch": 1.1782288645995984, "grad_norm": 0.16730302572250366, "learning_rate": 1.3442920712178622e-05, "loss": 0.4591, "step": 5282 }, { "epoch": 1.178451929511488, "grad_norm": 0.16881461441516876, "learning_rate": 1.3440711070200903e-05, "loss": 0.4516, "step": 5283 }, { "epoch": 1.1786749944233772, "grad_norm": 0.1790027767419815, "learning_rate": 1.3438501237654958e-05, "loss": 0.4627, "step": 5284 }, { "epoch": 1.1788980593352665, "grad_norm": 0.1679370403289795, "learning_rate": 1.3436291214663186e-05, "loss": 0.4759, "step": 5285 }, { "epoch": 1.179121124247156, "grad_norm": 0.17167186737060547, "learning_rate": 1.3434081001347992e-05, "loss": 0.4687, "step": 5286 }, { "epoch": 1.1793441891590453, "grad_norm": 0.1699121594429016, "learning_rate": 1.3431870597831792e-05, "loss": 0.4634, "step": 5287 }, { "epoch": 1.1795672540709345, "grad_norm": 0.15923888981342316, "learning_rate": 1.3429660004237008e-05, "loss": 0.4293, "step": 5288 }, { "epoch": 1.179790318982824, "grad_norm": 0.15723690390586853, "learning_rate": 1.3427449220686085e-05, "loss": 0.4476, "step": 5289 }, { "epoch": 1.1800133838947133, "grad_norm": 0.16163286566734314, "learning_rate": 1.342523824730146e-05, "loss": 0.4554, "step": 5290 }, { "epoch": 1.1802364488066028, "grad_norm": 0.16615921258926392, "learning_rate": 1.3423027084205597e-05, "loss": 0.4751, "step": 5291 }, { "epoch": 1.1804595137184921, "grad_norm": 0.1613457053899765, "learning_rate": 1.3420815731520965e-05, "loss": 0.462, "step": 5292 }, { "epoch": 1.1806825786303814, "grad_norm": 0.16468282043933868, "learning_rate": 1.3418604189370034e-05, "loss": 0.4773, "step": 5293 }, { "epoch": 1.1809056435422707, "grad_norm": 0.1656079739332199, "learning_rate": 1.3416392457875302e-05, "loss": 0.4593, "step": 5294 }, { "epoch": 1.1811287084541602, "grad_norm": 0.15908241271972656, "learning_rate": 1.3414180537159265e-05, "loss": 0.4527, "step": 5295 }, { "epoch": 1.1813517733660495, "grad_norm": 0.181619331240654, "learning_rate": 1.3411968427344438e-05, "loss": 0.4779, "step": 5296 }, { "epoch": 1.181574838277939, "grad_norm": 0.16857735812664032, "learning_rate": 1.3409756128553331e-05, "loss": 0.4994, "step": 5297 }, { "epoch": 1.1817979031898282, "grad_norm": 0.16610047221183777, "learning_rate": 1.3407543640908485e-05, "loss": 0.4669, "step": 5298 }, { "epoch": 1.1820209681017175, "grad_norm": 0.16683466732501984, "learning_rate": 1.3405330964532437e-05, "loss": 0.4855, "step": 5299 }, { "epoch": 1.182244033013607, "grad_norm": 0.1574050635099411, "learning_rate": 1.340311809954774e-05, "loss": 0.4539, "step": 5300 }, { "epoch": 1.1824670979254963, "grad_norm": 0.16250169277191162, "learning_rate": 1.3400905046076955e-05, "loss": 0.4827, "step": 5301 }, { "epoch": 1.1826901628373856, "grad_norm": 0.179072305560112, "learning_rate": 1.3398691804242658e-05, "loss": 0.4821, "step": 5302 }, { "epoch": 1.182913227749275, "grad_norm": 0.15977934002876282, "learning_rate": 1.339647837416743e-05, "loss": 0.436, "step": 5303 }, { "epoch": 1.1831362926611644, "grad_norm": 0.16551627218723297, "learning_rate": 1.3394264755973864e-05, "loss": 0.473, "step": 5304 }, { "epoch": 1.1833593575730537, "grad_norm": 0.16227102279663086, "learning_rate": 1.3392050949784566e-05, "loss": 0.4693, "step": 5305 }, { "epoch": 1.1835824224849432, "grad_norm": 0.16538846492767334, "learning_rate": 1.3389836955722149e-05, "loss": 0.4523, "step": 5306 }, { "epoch": 1.1838054873968324, "grad_norm": 0.1663748174905777, "learning_rate": 1.3387622773909237e-05, "loss": 0.4656, "step": 5307 }, { "epoch": 1.184028552308722, "grad_norm": 0.1712309867143631, "learning_rate": 1.338540840446847e-05, "loss": 0.4611, "step": 5308 }, { "epoch": 1.1842516172206112, "grad_norm": 0.1841256320476532, "learning_rate": 1.3383193847522487e-05, "loss": 0.4955, "step": 5309 }, { "epoch": 1.1844746821325005, "grad_norm": 0.151309996843338, "learning_rate": 1.338097910319395e-05, "loss": 0.4323, "step": 5310 }, { "epoch": 1.1846977470443898, "grad_norm": 0.16778849065303802, "learning_rate": 1.337876417160552e-05, "loss": 0.4633, "step": 5311 }, { "epoch": 1.1849208119562793, "grad_norm": 0.16986416280269623, "learning_rate": 1.3376549052879883e-05, "loss": 0.4243, "step": 5312 }, { "epoch": 1.1851438768681686, "grad_norm": 0.1710985153913498, "learning_rate": 1.3374333747139714e-05, "loss": 0.4569, "step": 5313 }, { "epoch": 1.185366941780058, "grad_norm": 0.1675567626953125, "learning_rate": 1.3372118254507718e-05, "loss": 0.4747, "step": 5314 }, { "epoch": 1.1855900066919474, "grad_norm": 0.19594267010688782, "learning_rate": 1.3369902575106604e-05, "loss": 0.4645, "step": 5315 }, { "epoch": 1.1858130716038366, "grad_norm": 0.1636292189359665, "learning_rate": 1.3367686709059084e-05, "loss": 0.4467, "step": 5316 }, { "epoch": 1.1860361365157261, "grad_norm": 0.16308671236038208, "learning_rate": 1.3365470656487896e-05, "loss": 0.4704, "step": 5317 }, { "epoch": 1.1862592014276154, "grad_norm": 0.16027814149856567, "learning_rate": 1.3363254417515769e-05, "loss": 0.4726, "step": 5318 }, { "epoch": 1.1864822663395047, "grad_norm": 0.16369383037090302, "learning_rate": 1.336103799226546e-05, "loss": 0.4467, "step": 5319 }, { "epoch": 1.1867053312513942, "grad_norm": 0.1744934618473053, "learning_rate": 1.335882138085972e-05, "loss": 0.4677, "step": 5320 }, { "epoch": 1.1869283961632835, "grad_norm": 0.17109525203704834, "learning_rate": 1.3356604583421331e-05, "loss": 0.4587, "step": 5321 }, { "epoch": 1.1871514610751728, "grad_norm": 0.17207783460617065, "learning_rate": 1.335438760007306e-05, "loss": 0.4468, "step": 5322 }, { "epoch": 1.1873745259870623, "grad_norm": 0.16258476674556732, "learning_rate": 1.3352170430937707e-05, "loss": 0.4498, "step": 5323 }, { "epoch": 1.1875975908989516, "grad_norm": 0.1659199595451355, "learning_rate": 1.334995307613807e-05, "loss": 0.4492, "step": 5324 }, { "epoch": 1.187820655810841, "grad_norm": 0.18148742616176605, "learning_rate": 1.3347735535796957e-05, "loss": 0.4785, "step": 5325 }, { "epoch": 1.1880437207227303, "grad_norm": 0.16647182404994965, "learning_rate": 1.3345517810037194e-05, "loss": 0.4375, "step": 5326 }, { "epoch": 1.1882667856346196, "grad_norm": 0.16515585780143738, "learning_rate": 1.334329989898161e-05, "loss": 0.4653, "step": 5327 }, { "epoch": 1.1884898505465091, "grad_norm": 0.161190927028656, "learning_rate": 1.3341081802753046e-05, "loss": 0.4448, "step": 5328 }, { "epoch": 1.1887129154583984, "grad_norm": 0.1584840565919876, "learning_rate": 1.333886352147436e-05, "loss": 0.446, "step": 5329 }, { "epoch": 1.1889359803702877, "grad_norm": 0.16086257994174957, "learning_rate": 1.3336645055268405e-05, "loss": 0.461, "step": 5330 }, { "epoch": 1.1891590452821772, "grad_norm": 0.17658580839633942, "learning_rate": 1.333442640425806e-05, "loss": 0.4905, "step": 5331 }, { "epoch": 1.1893821101940665, "grad_norm": 0.17238235473632812, "learning_rate": 1.3332207568566209e-05, "loss": 0.4966, "step": 5332 }, { "epoch": 1.1896051751059558, "grad_norm": 0.16364935040473938, "learning_rate": 1.332998854831574e-05, "loss": 0.4554, "step": 5333 }, { "epoch": 1.1898282400178453, "grad_norm": 0.19050312042236328, "learning_rate": 1.3327769343629559e-05, "loss": 0.4485, "step": 5334 }, { "epoch": 1.1900513049297345, "grad_norm": 0.17412003874778748, "learning_rate": 1.3325549954630579e-05, "loss": 0.4776, "step": 5335 }, { "epoch": 1.1902743698416238, "grad_norm": 0.16760744154453278, "learning_rate": 1.3323330381441723e-05, "loss": 0.4481, "step": 5336 }, { "epoch": 1.1904974347535133, "grad_norm": 0.16635169088840485, "learning_rate": 1.3321110624185927e-05, "loss": 0.4572, "step": 5337 }, { "epoch": 1.1907204996654026, "grad_norm": 0.1623852401971817, "learning_rate": 1.3318890682986135e-05, "loss": 0.455, "step": 5338 }, { "epoch": 1.190943564577292, "grad_norm": 0.1643582582473755, "learning_rate": 1.3316670557965299e-05, "loss": 0.4356, "step": 5339 }, { "epoch": 1.1911666294891814, "grad_norm": 0.1604517102241516, "learning_rate": 1.3314450249246385e-05, "loss": 0.4334, "step": 5340 }, { "epoch": 1.1913896944010707, "grad_norm": 0.16572065651416779, "learning_rate": 1.3312229756952366e-05, "loss": 0.467, "step": 5341 }, { "epoch": 1.1916127593129602, "grad_norm": 0.1680530309677124, "learning_rate": 1.3310009081206232e-05, "loss": 0.4546, "step": 5342 }, { "epoch": 1.1918358242248495, "grad_norm": 0.16786667704582214, "learning_rate": 1.330778822213097e-05, "loss": 0.4519, "step": 5343 }, { "epoch": 1.1920588891367387, "grad_norm": 0.17252503335475922, "learning_rate": 1.3305567179849594e-05, "loss": 0.4748, "step": 5344 }, { "epoch": 1.1922819540486282, "grad_norm": 0.16872383654117584, "learning_rate": 1.3303345954485113e-05, "loss": 0.4529, "step": 5345 }, { "epoch": 1.1925050189605175, "grad_norm": 0.1684456765651703, "learning_rate": 1.330112454616055e-05, "loss": 0.4696, "step": 5346 }, { "epoch": 1.1927280838724068, "grad_norm": 0.16428877413272858, "learning_rate": 1.3298902954998951e-05, "loss": 0.4595, "step": 5347 }, { "epoch": 1.1929511487842963, "grad_norm": 0.17616313695907593, "learning_rate": 1.329668118112335e-05, "loss": 0.4696, "step": 5348 }, { "epoch": 1.1931742136961856, "grad_norm": 0.18709328770637512, "learning_rate": 1.3294459224656813e-05, "loss": 0.4847, "step": 5349 }, { "epoch": 1.1933972786080749, "grad_norm": 0.17944732308387756, "learning_rate": 1.3292237085722396e-05, "loss": 0.4338, "step": 5350 }, { "epoch": 1.1936203435199644, "grad_norm": 0.1728639304637909, "learning_rate": 1.3290014764443186e-05, "loss": 0.4658, "step": 5351 }, { "epoch": 1.1938434084318537, "grad_norm": 0.17052049934864044, "learning_rate": 1.328779226094226e-05, "loss": 0.4922, "step": 5352 }, { "epoch": 1.194066473343743, "grad_norm": 0.16665810346603394, "learning_rate": 1.3285569575342719e-05, "loss": 0.485, "step": 5353 }, { "epoch": 1.1942895382556324, "grad_norm": 0.17398186028003693, "learning_rate": 1.3283346707767666e-05, "loss": 0.4587, "step": 5354 }, { "epoch": 1.1945126031675217, "grad_norm": 0.1634979397058487, "learning_rate": 1.3281123658340222e-05, "loss": 0.4309, "step": 5355 }, { "epoch": 1.194735668079411, "grad_norm": 0.20972603559494019, "learning_rate": 1.3278900427183507e-05, "loss": 0.4808, "step": 5356 }, { "epoch": 1.1949587329913005, "grad_norm": 0.17813637852668762, "learning_rate": 1.3276677014420665e-05, "loss": 0.4556, "step": 5357 }, { "epoch": 1.1951817979031898, "grad_norm": 0.18965846300125122, "learning_rate": 1.3274453420174835e-05, "loss": 0.4716, "step": 5358 }, { "epoch": 1.1954048628150793, "grad_norm": 0.16441969573497772, "learning_rate": 1.3272229644569182e-05, "loss": 0.4379, "step": 5359 }, { "epoch": 1.1956279277269686, "grad_norm": 0.17030787467956543, "learning_rate": 1.3270005687726864e-05, "loss": 0.4506, "step": 5360 }, { "epoch": 1.1958509926388579, "grad_norm": 0.1666693389415741, "learning_rate": 1.3267781549771064e-05, "loss": 0.471, "step": 5361 }, { "epoch": 1.1960740575507474, "grad_norm": 0.1696898639202118, "learning_rate": 1.3265557230824967e-05, "loss": 0.4808, "step": 5362 }, { "epoch": 1.1962971224626366, "grad_norm": 0.17153112590312958, "learning_rate": 1.326333273101177e-05, "loss": 0.4762, "step": 5363 }, { "epoch": 1.196520187374526, "grad_norm": 0.1648848056793213, "learning_rate": 1.3261108050454674e-05, "loss": 0.4448, "step": 5364 }, { "epoch": 1.1967432522864154, "grad_norm": 0.17086121439933777, "learning_rate": 1.3258883189276906e-05, "loss": 0.4848, "step": 5365 }, { "epoch": 1.1969663171983047, "grad_norm": 0.16545124351978302, "learning_rate": 1.3256658147601686e-05, "loss": 0.4797, "step": 5366 }, { "epoch": 1.197189382110194, "grad_norm": 0.19573906064033508, "learning_rate": 1.3254432925552252e-05, "loss": 0.4813, "step": 5367 }, { "epoch": 1.1974124470220835, "grad_norm": 0.16083821654319763, "learning_rate": 1.3252207523251854e-05, "loss": 0.4509, "step": 5368 }, { "epoch": 1.1976355119339728, "grad_norm": 0.1647178679704666, "learning_rate": 1.3249981940823742e-05, "loss": 0.4506, "step": 5369 }, { "epoch": 1.197858576845862, "grad_norm": 0.16676411032676697, "learning_rate": 1.3247756178391192e-05, "loss": 0.4696, "step": 5370 }, { "epoch": 1.1980816417577516, "grad_norm": 0.16701491177082062, "learning_rate": 1.3245530236077474e-05, "loss": 0.4642, "step": 5371 }, { "epoch": 1.1983047066696408, "grad_norm": 0.16053996980190277, "learning_rate": 1.3243304114005878e-05, "loss": 0.449, "step": 5372 }, { "epoch": 1.1985277715815301, "grad_norm": 0.1547413319349289, "learning_rate": 1.3241077812299694e-05, "loss": 0.4496, "step": 5373 }, { "epoch": 1.1987508364934196, "grad_norm": 0.16424560546875, "learning_rate": 1.3238851331082237e-05, "loss": 0.4802, "step": 5374 }, { "epoch": 1.198973901405309, "grad_norm": 0.1648808866739273, "learning_rate": 1.3236624670476819e-05, "loss": 0.4574, "step": 5375 }, { "epoch": 1.1991969663171984, "grad_norm": 0.1601562201976776, "learning_rate": 1.323439783060677e-05, "loss": 0.4553, "step": 5376 }, { "epoch": 1.1994200312290877, "grad_norm": 0.16455237567424774, "learning_rate": 1.323217081159542e-05, "loss": 0.4666, "step": 5377 }, { "epoch": 1.199643096140977, "grad_norm": 0.16123172640800476, "learning_rate": 1.3229943613566118e-05, "loss": 0.4797, "step": 5378 }, { "epoch": 1.1998661610528665, "grad_norm": 0.18106834590435028, "learning_rate": 1.3227716236642226e-05, "loss": 0.476, "step": 5379 }, { "epoch": 1.2000892259647558, "grad_norm": 0.1630832701921463, "learning_rate": 1.3225488680947103e-05, "loss": 0.4995, "step": 5380 }, { "epoch": 1.200312290876645, "grad_norm": 0.16625766456127167, "learning_rate": 1.322326094660413e-05, "loss": 0.4662, "step": 5381 }, { "epoch": 1.2005353557885345, "grad_norm": 0.16396591067314148, "learning_rate": 1.3221033033736688e-05, "loss": 0.4631, "step": 5382 }, { "epoch": 1.2007584207004238, "grad_norm": 0.161235049366951, "learning_rate": 1.321880494246818e-05, "loss": 0.4481, "step": 5383 }, { "epoch": 1.200981485612313, "grad_norm": 0.16487468779087067, "learning_rate": 1.3216576672922002e-05, "loss": 0.463, "step": 5384 }, { "epoch": 1.2012045505242026, "grad_norm": 0.16097383201122284, "learning_rate": 1.3214348225221578e-05, "loss": 0.4517, "step": 5385 }, { "epoch": 1.201427615436092, "grad_norm": 0.1607978343963623, "learning_rate": 1.3212119599490327e-05, "loss": 0.4775, "step": 5386 }, { "epoch": 1.2016506803479812, "grad_norm": 0.1695454865694046, "learning_rate": 1.3209890795851693e-05, "loss": 0.4667, "step": 5387 }, { "epoch": 1.2018737452598707, "grad_norm": 0.16572801768779755, "learning_rate": 1.3207661814429112e-05, "loss": 0.4623, "step": 5388 }, { "epoch": 1.20209681017176, "grad_norm": 0.1595952957868576, "learning_rate": 1.3205432655346044e-05, "loss": 0.4801, "step": 5389 }, { "epoch": 1.2023198750836492, "grad_norm": 0.16465139389038086, "learning_rate": 1.3203203318725951e-05, "loss": 0.4801, "step": 5390 }, { "epoch": 1.2025429399955387, "grad_norm": 0.1634162813425064, "learning_rate": 1.320097380469231e-05, "loss": 0.4807, "step": 5391 }, { "epoch": 1.202766004907428, "grad_norm": 0.16548092663288116, "learning_rate": 1.3198744113368604e-05, "loss": 0.4569, "step": 5392 }, { "epoch": 1.2029890698193175, "grad_norm": 0.16287754476070404, "learning_rate": 1.319651424487833e-05, "loss": 0.4539, "step": 5393 }, { "epoch": 1.2032121347312068, "grad_norm": 0.16865308582782745, "learning_rate": 1.3194284199344987e-05, "loss": 0.4555, "step": 5394 }, { "epoch": 1.203435199643096, "grad_norm": 0.16417443752288818, "learning_rate": 1.3192053976892097e-05, "loss": 0.4449, "step": 5395 }, { "epoch": 1.2036582645549856, "grad_norm": 0.17047269642353058, "learning_rate": 1.3189823577643175e-05, "loss": 0.4669, "step": 5396 }, { "epoch": 1.2038813294668749, "grad_norm": 0.15792153775691986, "learning_rate": 1.3187593001721762e-05, "loss": 0.4303, "step": 5397 }, { "epoch": 1.2041043943787642, "grad_norm": 0.16310684382915497, "learning_rate": 1.3185362249251395e-05, "loss": 0.4373, "step": 5398 }, { "epoch": 1.2043274592906537, "grad_norm": 0.15764662623405457, "learning_rate": 1.3183131320355629e-05, "loss": 0.4409, "step": 5399 }, { "epoch": 1.204550524202543, "grad_norm": 0.17336583137512207, "learning_rate": 1.3180900215158028e-05, "loss": 0.4655, "step": 5400 }, { "epoch": 1.2047735891144322, "grad_norm": 0.1825001984834671, "learning_rate": 1.3178668933782166e-05, "loss": 0.4676, "step": 5401 }, { "epoch": 1.2049966540263217, "grad_norm": 0.16924050450325012, "learning_rate": 1.3176437476351625e-05, "loss": 0.4762, "step": 5402 }, { "epoch": 1.205219718938211, "grad_norm": 0.163119837641716, "learning_rate": 1.3174205842989993e-05, "loss": 0.4078, "step": 5403 }, { "epoch": 1.2054427838501003, "grad_norm": 0.16703581809997559, "learning_rate": 1.3171974033820878e-05, "loss": 0.4769, "step": 5404 }, { "epoch": 1.2056658487619898, "grad_norm": 0.1679922491312027, "learning_rate": 1.3169742048967886e-05, "loss": 0.4776, "step": 5405 }, { "epoch": 1.205888913673879, "grad_norm": 0.16275057196617126, "learning_rate": 1.3167509888554641e-05, "loss": 0.4646, "step": 5406 }, { "epoch": 1.2061119785857684, "grad_norm": 0.16767756640911102, "learning_rate": 1.3165277552704774e-05, "loss": 0.4401, "step": 5407 }, { "epoch": 1.2063350434976579, "grad_norm": 0.16466595232486725, "learning_rate": 1.3163045041541929e-05, "loss": 0.4626, "step": 5408 }, { "epoch": 1.2065581084095471, "grad_norm": 0.17687217891216278, "learning_rate": 1.316081235518975e-05, "loss": 0.4553, "step": 5409 }, { "epoch": 1.2067811733214366, "grad_norm": 0.16307489573955536, "learning_rate": 1.3158579493771901e-05, "loss": 0.4444, "step": 5410 }, { "epoch": 1.207004238233326, "grad_norm": 0.21108053624629974, "learning_rate": 1.3156346457412051e-05, "loss": 0.4845, "step": 5411 }, { "epoch": 1.2072273031452152, "grad_norm": 0.1662529855966568, "learning_rate": 1.3154113246233881e-05, "loss": 0.4673, "step": 5412 }, { "epoch": 1.2074503680571047, "grad_norm": 0.1835554838180542, "learning_rate": 1.3151879860361078e-05, "loss": 0.4679, "step": 5413 }, { "epoch": 1.207673432968994, "grad_norm": 0.16841189563274384, "learning_rate": 1.3149646299917342e-05, "loss": 0.4932, "step": 5414 }, { "epoch": 1.2078964978808833, "grad_norm": 0.15623950958251953, "learning_rate": 1.3147412565026385e-05, "loss": 0.4396, "step": 5415 }, { "epoch": 1.2081195627927728, "grad_norm": 0.1630774587392807, "learning_rate": 1.3145178655811921e-05, "loss": 0.459, "step": 5416 }, { "epoch": 1.208342627704662, "grad_norm": 0.16008590161800385, "learning_rate": 1.3142944572397677e-05, "loss": 0.4662, "step": 5417 }, { "epoch": 1.2085656926165513, "grad_norm": 0.16384388506412506, "learning_rate": 1.3140710314907392e-05, "loss": 0.4481, "step": 5418 }, { "epoch": 1.2087887575284408, "grad_norm": 0.1759367287158966, "learning_rate": 1.3138475883464818e-05, "loss": 0.4766, "step": 5419 }, { "epoch": 1.2090118224403301, "grad_norm": 0.1597670316696167, "learning_rate": 1.3136241278193704e-05, "loss": 0.4521, "step": 5420 }, { "epoch": 1.2092348873522194, "grad_norm": 0.1790318489074707, "learning_rate": 1.3134006499217824e-05, "loss": 0.4404, "step": 5421 }, { "epoch": 1.209457952264109, "grad_norm": 0.16232487559318542, "learning_rate": 1.3131771546660947e-05, "loss": 0.4436, "step": 5422 }, { "epoch": 1.2096810171759982, "grad_norm": 0.16369551420211792, "learning_rate": 1.3129536420646862e-05, "loss": 0.4754, "step": 5423 }, { "epoch": 1.2099040820878875, "grad_norm": 0.1614258736371994, "learning_rate": 1.3127301121299367e-05, "loss": 0.4436, "step": 5424 }, { "epoch": 1.210127146999777, "grad_norm": 0.163519024848938, "learning_rate": 1.3125065648742263e-05, "loss": 0.4443, "step": 5425 }, { "epoch": 1.2103502119116663, "grad_norm": 0.16275057196617126, "learning_rate": 1.3122830003099364e-05, "loss": 0.4632, "step": 5426 }, { "epoch": 1.2105732768235558, "grad_norm": 0.1647331267595291, "learning_rate": 1.3120594184494499e-05, "loss": 0.4644, "step": 5427 }, { "epoch": 1.210796341735445, "grad_norm": 0.16043558716773987, "learning_rate": 1.3118358193051499e-05, "loss": 0.4691, "step": 5428 }, { "epoch": 1.2110194066473343, "grad_norm": 0.1672472357749939, "learning_rate": 1.3116122028894206e-05, "loss": 0.4663, "step": 5429 }, { "epoch": 1.2112424715592238, "grad_norm": 0.16601301729679108, "learning_rate": 1.3113885692146473e-05, "loss": 0.4594, "step": 5430 }, { "epoch": 1.211465536471113, "grad_norm": 0.16756746172904968, "learning_rate": 1.3111649182932163e-05, "loss": 0.4555, "step": 5431 }, { "epoch": 1.2116886013830024, "grad_norm": 0.1739458590745926, "learning_rate": 1.3109412501375149e-05, "loss": 0.4703, "step": 5432 }, { "epoch": 1.2119116662948919, "grad_norm": 0.16856876015663147, "learning_rate": 1.3107175647599311e-05, "loss": 0.448, "step": 5433 }, { "epoch": 1.2121347312067812, "grad_norm": 0.16722862422466278, "learning_rate": 1.3104938621728542e-05, "loss": 0.4598, "step": 5434 }, { "epoch": 1.2123577961186704, "grad_norm": 0.16905224323272705, "learning_rate": 1.310270142388674e-05, "loss": 0.4782, "step": 5435 }, { "epoch": 1.21258086103056, "grad_norm": 0.16216522455215454, "learning_rate": 1.3100464054197819e-05, "loss": 0.4755, "step": 5436 }, { "epoch": 1.2128039259424492, "grad_norm": 0.17112210392951965, "learning_rate": 1.3098226512785695e-05, "loss": 0.4774, "step": 5437 }, { "epoch": 1.2130269908543385, "grad_norm": 0.16165569424629211, "learning_rate": 1.3095988799774296e-05, "loss": 0.4723, "step": 5438 }, { "epoch": 1.213250055766228, "grad_norm": 0.16415071487426758, "learning_rate": 1.3093750915287565e-05, "loss": 0.4649, "step": 5439 }, { "epoch": 1.2134731206781173, "grad_norm": 0.23170824348926544, "learning_rate": 1.3091512859449447e-05, "loss": 0.4754, "step": 5440 }, { "epoch": 1.2136961855900066, "grad_norm": 0.15743504464626312, "learning_rate": 1.30892746323839e-05, "loss": 0.4367, "step": 5441 }, { "epoch": 1.213919250501896, "grad_norm": 0.1775827705860138, "learning_rate": 1.3087036234214892e-05, "loss": 0.468, "step": 5442 }, { "epoch": 1.2141423154137854, "grad_norm": 0.16368676722049713, "learning_rate": 1.3084797665066398e-05, "loss": 0.4635, "step": 5443 }, { "epoch": 1.2143653803256749, "grad_norm": 0.15797144174575806, "learning_rate": 1.3082558925062406e-05, "loss": 0.4378, "step": 5444 }, { "epoch": 1.2145884452375642, "grad_norm": 0.16582882404327393, "learning_rate": 1.308032001432691e-05, "loss": 0.4508, "step": 5445 }, { "epoch": 1.2148115101494534, "grad_norm": 0.16783961653709412, "learning_rate": 1.307808093298392e-05, "loss": 0.478, "step": 5446 }, { "epoch": 1.215034575061343, "grad_norm": 0.29538553953170776, "learning_rate": 1.307584168115744e-05, "loss": 0.4617, "step": 5447 }, { "epoch": 1.2152576399732322, "grad_norm": 0.16198082268238068, "learning_rate": 1.3073602258971503e-05, "loss": 0.4605, "step": 5448 }, { "epoch": 1.2154807048851215, "grad_norm": 0.16145728528499603, "learning_rate": 1.3071362666550136e-05, "loss": 0.4702, "step": 5449 }, { "epoch": 1.215703769797011, "grad_norm": 0.16520316898822784, "learning_rate": 1.3069122904017389e-05, "loss": 0.4913, "step": 5450 }, { "epoch": 1.2159268347089003, "grad_norm": 0.1647755205631256, "learning_rate": 1.3066882971497308e-05, "loss": 0.4538, "step": 5451 }, { "epoch": 1.2161498996207896, "grad_norm": 0.16983206570148468, "learning_rate": 1.3064642869113955e-05, "loss": 0.4531, "step": 5452 }, { "epoch": 1.216372964532679, "grad_norm": 0.16914002597332, "learning_rate": 1.3062402596991407e-05, "loss": 0.4643, "step": 5453 }, { "epoch": 1.2165960294445683, "grad_norm": 0.16433201730251312, "learning_rate": 1.3060162155253738e-05, "loss": 0.4398, "step": 5454 }, { "epoch": 1.2168190943564579, "grad_norm": 0.1646953821182251, "learning_rate": 1.305792154402504e-05, "loss": 0.4704, "step": 5455 }, { "epoch": 1.2170421592683471, "grad_norm": 0.1791263222694397, "learning_rate": 1.3055680763429411e-05, "loss": 0.4338, "step": 5456 }, { "epoch": 1.2172652241802364, "grad_norm": 0.16176985204219818, "learning_rate": 1.305343981359096e-05, "loss": 0.4534, "step": 5457 }, { "epoch": 1.2174882890921257, "grad_norm": 0.16253705322742462, "learning_rate": 1.3051198694633804e-05, "loss": 0.4728, "step": 5458 }, { "epoch": 1.2177113540040152, "grad_norm": 0.1789017915725708, "learning_rate": 1.3048957406682074e-05, "loss": 0.4333, "step": 5459 }, { "epoch": 1.2179344189159045, "grad_norm": 0.16050660610198975, "learning_rate": 1.3046715949859902e-05, "loss": 0.4614, "step": 5460 }, { "epoch": 1.218157483827794, "grad_norm": 0.15737925469875336, "learning_rate": 1.3044474324291438e-05, "loss": 0.4558, "step": 5461 }, { "epoch": 1.2183805487396833, "grad_norm": 0.16488178074359894, "learning_rate": 1.3042232530100833e-05, "loss": 0.4804, "step": 5462 }, { "epoch": 1.2186036136515725, "grad_norm": 0.16783639788627625, "learning_rate": 1.3039990567412255e-05, "loss": 0.4564, "step": 5463 }, { "epoch": 1.218826678563462, "grad_norm": 0.16743387281894684, "learning_rate": 1.303774843634988e-05, "loss": 0.4568, "step": 5464 }, { "epoch": 1.2190497434753513, "grad_norm": 0.16670465469360352, "learning_rate": 1.3035506137037883e-05, "loss": 0.4416, "step": 5465 }, { "epoch": 1.2192728083872406, "grad_norm": 0.17037998139858246, "learning_rate": 1.3033263669600466e-05, "loss": 0.4753, "step": 5466 }, { "epoch": 1.2194958732991301, "grad_norm": 0.17123369872570038, "learning_rate": 1.3031021034161827e-05, "loss": 0.4718, "step": 5467 }, { "epoch": 1.2197189382110194, "grad_norm": 0.17543406784534454, "learning_rate": 1.3028778230846178e-05, "loss": 0.4564, "step": 5468 }, { "epoch": 1.2199420031229087, "grad_norm": 0.17404866218566895, "learning_rate": 1.3026535259777734e-05, "loss": 0.4716, "step": 5469 }, { "epoch": 1.2201650680347982, "grad_norm": 0.1693383902311325, "learning_rate": 1.3024292121080735e-05, "loss": 0.4759, "step": 5470 }, { "epoch": 1.2203881329466875, "grad_norm": 0.18779967725276947, "learning_rate": 1.3022048814879412e-05, "loss": 0.4784, "step": 5471 }, { "epoch": 1.220611197858577, "grad_norm": 0.16473285853862762, "learning_rate": 1.3019805341298017e-05, "loss": 0.4334, "step": 5472 }, { "epoch": 1.2208342627704662, "grad_norm": 0.1686456799507141, "learning_rate": 1.3017561700460806e-05, "loss": 0.4744, "step": 5473 }, { "epoch": 1.2210573276823555, "grad_norm": 0.16365700960159302, "learning_rate": 1.301531789249205e-05, "loss": 0.4711, "step": 5474 }, { "epoch": 1.2212803925942448, "grad_norm": 0.16734614968299866, "learning_rate": 1.3013073917516018e-05, "loss": 0.4512, "step": 5475 }, { "epoch": 1.2215034575061343, "grad_norm": 0.1621614247560501, "learning_rate": 1.3010829775657001e-05, "loss": 0.4579, "step": 5476 }, { "epoch": 1.2217265224180236, "grad_norm": 0.16041643917560577, "learning_rate": 1.3008585467039291e-05, "loss": 0.4723, "step": 5477 }, { "epoch": 1.221949587329913, "grad_norm": 0.17315927147865295, "learning_rate": 1.3006340991787196e-05, "loss": 0.4888, "step": 5478 }, { "epoch": 1.2221726522418024, "grad_norm": 0.16364838182926178, "learning_rate": 1.3004096350025024e-05, "loss": 0.4456, "step": 5479 }, { "epoch": 1.2223957171536917, "grad_norm": 0.17080283164978027, "learning_rate": 1.30018515418771e-05, "loss": 0.4545, "step": 5480 }, { "epoch": 1.2226187820655812, "grad_norm": 0.1685846894979477, "learning_rate": 1.2999606567467753e-05, "loss": 0.4709, "step": 5481 }, { "epoch": 1.2228418469774704, "grad_norm": 0.16784535348415375, "learning_rate": 1.2997361426921331e-05, "loss": 0.4753, "step": 5482 }, { "epoch": 1.2230649118893597, "grad_norm": 0.17074494063854218, "learning_rate": 1.2995116120362175e-05, "loss": 0.4519, "step": 5483 }, { "epoch": 1.2232879768012492, "grad_norm": 0.15669210255146027, "learning_rate": 1.2992870647914648e-05, "loss": 0.4205, "step": 5484 }, { "epoch": 1.2235110417131385, "grad_norm": 0.16939307749271393, "learning_rate": 1.2990625009703122e-05, "loss": 0.4685, "step": 5485 }, { "epoch": 1.2237341066250278, "grad_norm": 0.1645343005657196, "learning_rate": 1.298837920585197e-05, "loss": 0.4713, "step": 5486 }, { "epoch": 1.2239571715369173, "grad_norm": 0.16123943030834198, "learning_rate": 1.298613323648558e-05, "loss": 0.4592, "step": 5487 }, { "epoch": 1.2241802364488066, "grad_norm": 0.16125601530075073, "learning_rate": 1.2983887101728342e-05, "loss": 0.4522, "step": 5488 }, { "epoch": 1.224403301360696, "grad_norm": 0.16092932224273682, "learning_rate": 1.2981640801704676e-05, "loss": 0.4469, "step": 5489 }, { "epoch": 1.2246263662725854, "grad_norm": 0.16154557466506958, "learning_rate": 1.2979394336538982e-05, "loss": 0.5035, "step": 5490 }, { "epoch": 1.2248494311844746, "grad_norm": 0.16762235760688782, "learning_rate": 1.2977147706355688e-05, "loss": 0.4505, "step": 5491 }, { "epoch": 1.225072496096364, "grad_norm": 0.16036982834339142, "learning_rate": 1.297490091127923e-05, "loss": 0.4635, "step": 5492 }, { "epoch": 1.2252955610082534, "grad_norm": 0.16005462408065796, "learning_rate": 1.2972653951434046e-05, "loss": 0.4745, "step": 5493 }, { "epoch": 1.2255186259201427, "grad_norm": 0.16705186665058136, "learning_rate": 1.2970406826944589e-05, "loss": 0.4684, "step": 5494 }, { "epoch": 1.2257416908320322, "grad_norm": 0.16716639697551727, "learning_rate": 1.2968159537935312e-05, "loss": 0.4612, "step": 5495 }, { "epoch": 1.2259647557439215, "grad_norm": 0.16546767950057983, "learning_rate": 1.2965912084530693e-05, "loss": 0.4797, "step": 5496 }, { "epoch": 1.2261878206558108, "grad_norm": 0.16606946289539337, "learning_rate": 1.2963664466855207e-05, "loss": 0.4695, "step": 5497 }, { "epoch": 1.2264108855677003, "grad_norm": 0.16966325044631958, "learning_rate": 1.2961416685033339e-05, "loss": 0.4498, "step": 5498 }, { "epoch": 1.2266339504795896, "grad_norm": 0.17357441782951355, "learning_rate": 1.2959168739189587e-05, "loss": 0.4576, "step": 5499 }, { "epoch": 1.2268570153914788, "grad_norm": 0.16445501148700714, "learning_rate": 1.2956920629448458e-05, "loss": 0.4371, "step": 5500 }, { "epoch": 1.2270800803033683, "grad_norm": 0.16668792068958282, "learning_rate": 1.295467235593446e-05, "loss": 0.4698, "step": 5501 }, { "epoch": 1.2273031452152576, "grad_norm": 0.15939384698867798, "learning_rate": 1.2952423918772128e-05, "loss": 0.4261, "step": 5502 }, { "epoch": 1.227526210127147, "grad_norm": 0.1705656349658966, "learning_rate": 1.2950175318085983e-05, "loss": 0.441, "step": 5503 }, { "epoch": 1.2277492750390364, "grad_norm": 0.16186708211898804, "learning_rate": 1.2947926554000574e-05, "loss": 0.4795, "step": 5504 }, { "epoch": 1.2279723399509257, "grad_norm": 0.16865457594394684, "learning_rate": 1.2945677626640447e-05, "loss": 0.4653, "step": 5505 }, { "epoch": 1.2281954048628152, "grad_norm": 0.16898195445537567, "learning_rate": 1.2943428536130167e-05, "loss": 0.4422, "step": 5506 }, { "epoch": 1.2284184697747045, "grad_norm": 0.15895049273967743, "learning_rate": 1.2941179282594298e-05, "loss": 0.4435, "step": 5507 }, { "epoch": 1.2286415346865938, "grad_norm": 0.17526674270629883, "learning_rate": 1.293892986615742e-05, "loss": 0.4486, "step": 5508 }, { "epoch": 1.228864599598483, "grad_norm": 0.16472922265529633, "learning_rate": 1.2936680286944118e-05, "loss": 0.4414, "step": 5509 }, { "epoch": 1.2290876645103725, "grad_norm": 0.16342167556285858, "learning_rate": 1.2934430545078991e-05, "loss": 0.4708, "step": 5510 }, { "epoch": 1.2293107294222618, "grad_norm": 0.16461318731307983, "learning_rate": 1.2932180640686643e-05, "loss": 0.4545, "step": 5511 }, { "epoch": 1.2295337943341513, "grad_norm": 0.16927462816238403, "learning_rate": 1.2929930573891685e-05, "loss": 0.4573, "step": 5512 }, { "epoch": 1.2297568592460406, "grad_norm": 0.1817016750574112, "learning_rate": 1.2927680344818741e-05, "loss": 0.442, "step": 5513 }, { "epoch": 1.22997992415793, "grad_norm": 0.16440905630588531, "learning_rate": 1.2925429953592446e-05, "loss": 0.4534, "step": 5514 }, { "epoch": 1.2302029890698194, "grad_norm": 0.16801615059375763, "learning_rate": 1.2923179400337435e-05, "loss": 0.447, "step": 5515 }, { "epoch": 1.2304260539817087, "grad_norm": 0.16331814229488373, "learning_rate": 1.2920928685178365e-05, "loss": 0.4432, "step": 5516 }, { "epoch": 1.230649118893598, "grad_norm": 0.17522062361240387, "learning_rate": 1.291867780823989e-05, "loss": 0.497, "step": 5517 }, { "epoch": 1.2308721838054875, "grad_norm": 0.1681208312511444, "learning_rate": 1.2916426769646677e-05, "loss": 0.4688, "step": 5518 }, { "epoch": 1.2310952487173767, "grad_norm": 0.16323919594287872, "learning_rate": 1.2914175569523408e-05, "loss": 0.4562, "step": 5519 }, { "epoch": 1.231318313629266, "grad_norm": 0.17886684834957123, "learning_rate": 1.291192420799476e-05, "loss": 0.4333, "step": 5520 }, { "epoch": 1.2315413785411555, "grad_norm": 0.17314215004444122, "learning_rate": 1.2909672685185437e-05, "loss": 0.4517, "step": 5521 }, { "epoch": 1.2317644434530448, "grad_norm": 0.16686271131038666, "learning_rate": 1.2907421001220138e-05, "loss": 0.4842, "step": 5522 }, { "epoch": 1.2319875083649343, "grad_norm": 0.16551433503627777, "learning_rate": 1.2905169156223574e-05, "loss": 0.4726, "step": 5523 }, { "epoch": 1.2322105732768236, "grad_norm": 0.17935457825660706, "learning_rate": 1.2902917150320468e-05, "loss": 0.4438, "step": 5524 }, { "epoch": 1.2324336381887129, "grad_norm": 0.1675693243741989, "learning_rate": 1.2900664983635551e-05, "loss": 0.4471, "step": 5525 }, { "epoch": 1.2326567031006022, "grad_norm": 0.1640770435333252, "learning_rate": 1.289841265629356e-05, "loss": 0.4245, "step": 5526 }, { "epoch": 1.2328797680124917, "grad_norm": 0.17131870985031128, "learning_rate": 1.2896160168419245e-05, "loss": 0.4767, "step": 5527 }, { "epoch": 1.233102832924381, "grad_norm": 0.1778288334608078, "learning_rate": 1.2893907520137364e-05, "loss": 0.4754, "step": 5528 }, { "epoch": 1.2333258978362704, "grad_norm": 0.16755534708499908, "learning_rate": 1.2891654711572678e-05, "loss": 0.4649, "step": 5529 }, { "epoch": 1.2335489627481597, "grad_norm": 0.1766786277294159, "learning_rate": 1.2889401742849965e-05, "loss": 0.5005, "step": 5530 }, { "epoch": 1.233772027660049, "grad_norm": 0.16578464210033417, "learning_rate": 1.2887148614094012e-05, "loss": 0.4371, "step": 5531 }, { "epoch": 1.2339950925719385, "grad_norm": 0.17723850905895233, "learning_rate": 1.2884895325429605e-05, "loss": 0.4787, "step": 5532 }, { "epoch": 1.2342181574838278, "grad_norm": 0.171020045876503, "learning_rate": 1.288264187698155e-05, "loss": 0.4576, "step": 5533 }, { "epoch": 1.234441222395717, "grad_norm": 0.16916728019714355, "learning_rate": 1.2880388268874653e-05, "loss": 0.4739, "step": 5534 }, { "epoch": 1.2346642873076066, "grad_norm": 0.16481377184391022, "learning_rate": 1.2878134501233736e-05, "loss": 0.4456, "step": 5535 }, { "epoch": 1.2348873522194959, "grad_norm": 0.3519321382045746, "learning_rate": 1.2875880574183628e-05, "loss": 0.4754, "step": 5536 }, { "epoch": 1.2351104171313851, "grad_norm": 0.23968984186649323, "learning_rate": 1.2873626487849162e-05, "loss": 0.4661, "step": 5537 }, { "epoch": 1.2353334820432746, "grad_norm": 0.16674065589904785, "learning_rate": 1.2871372242355186e-05, "loss": 0.4611, "step": 5538 }, { "epoch": 1.235556546955164, "grad_norm": 0.17423000931739807, "learning_rate": 1.2869117837826553e-05, "loss": 0.4456, "step": 5539 }, { "epoch": 1.2357796118670534, "grad_norm": 0.16467906534671783, "learning_rate": 1.2866863274388128e-05, "loss": 0.459, "step": 5540 }, { "epoch": 1.2360026767789427, "grad_norm": 0.1630530208349228, "learning_rate": 1.2864608552164779e-05, "loss": 0.4274, "step": 5541 }, { "epoch": 1.236225741690832, "grad_norm": 0.1718243807554245, "learning_rate": 1.2862353671281392e-05, "loss": 0.4572, "step": 5542 }, { "epoch": 1.2364488066027213, "grad_norm": 0.1649494469165802, "learning_rate": 1.2860098631862852e-05, "loss": 0.4365, "step": 5543 }, { "epoch": 1.2366718715146108, "grad_norm": 0.16701866686344147, "learning_rate": 1.2857843434034058e-05, "loss": 0.4559, "step": 5544 }, { "epoch": 1.2368949364265, "grad_norm": 0.16802479326725006, "learning_rate": 1.2855588077919921e-05, "loss": 0.4636, "step": 5545 }, { "epoch": 1.2371180013383896, "grad_norm": 0.17907430231571198, "learning_rate": 1.2853332563645353e-05, "loss": 0.4747, "step": 5546 }, { "epoch": 1.2373410662502788, "grad_norm": 0.16412149369716644, "learning_rate": 1.2851076891335277e-05, "loss": 0.4492, "step": 5547 }, { "epoch": 1.2375641311621681, "grad_norm": 0.16632318496704102, "learning_rate": 1.2848821061114629e-05, "loss": 0.4628, "step": 5548 }, { "epoch": 1.2377871960740576, "grad_norm": 0.1785164624452591, "learning_rate": 1.2846565073108355e-05, "loss": 0.4821, "step": 5549 }, { "epoch": 1.238010260985947, "grad_norm": 0.1653890609741211, "learning_rate": 1.2844308927441397e-05, "loss": 0.435, "step": 5550 }, { "epoch": 1.2382333258978362, "grad_norm": 0.18326538801193237, "learning_rate": 1.2842052624238724e-05, "loss": 0.4691, "step": 5551 }, { "epoch": 1.2384563908097257, "grad_norm": 0.16218526661396027, "learning_rate": 1.2839796163625296e-05, "loss": 0.441, "step": 5552 }, { "epoch": 1.238679455721615, "grad_norm": 0.17150120437145233, "learning_rate": 1.2837539545726095e-05, "loss": 0.4533, "step": 5553 }, { "epoch": 1.2389025206335043, "grad_norm": 0.17477098107337952, "learning_rate": 1.2835282770666101e-05, "loss": 0.4898, "step": 5554 }, { "epoch": 1.2391255855453938, "grad_norm": 0.16403955221176147, "learning_rate": 1.2833025838570318e-05, "loss": 0.4591, "step": 5555 }, { "epoch": 1.239348650457283, "grad_norm": 0.17669036984443665, "learning_rate": 1.283076874956374e-05, "loss": 0.4727, "step": 5556 }, { "epoch": 1.2395717153691725, "grad_norm": 0.17886574566364288, "learning_rate": 1.2828511503771386e-05, "loss": 0.4824, "step": 5557 }, { "epoch": 1.2397947802810618, "grad_norm": 0.1615883857011795, "learning_rate": 1.282625410131827e-05, "loss": 0.4573, "step": 5558 }, { "epoch": 1.240017845192951, "grad_norm": 0.16335685551166534, "learning_rate": 1.2823996542329426e-05, "loss": 0.4624, "step": 5559 }, { "epoch": 1.2402409101048404, "grad_norm": 0.17322902381420135, "learning_rate": 1.2821738826929888e-05, "loss": 0.4607, "step": 5560 }, { "epoch": 1.24046397501673, "grad_norm": 0.17016318440437317, "learning_rate": 1.2819480955244705e-05, "loss": 0.4522, "step": 5561 }, { "epoch": 1.2406870399286192, "grad_norm": 0.16096848249435425, "learning_rate": 1.2817222927398932e-05, "loss": 0.4475, "step": 5562 }, { "epoch": 1.2409101048405087, "grad_norm": 0.166842982172966, "learning_rate": 1.281496474351763e-05, "loss": 0.4731, "step": 5563 }, { "epoch": 1.241133169752398, "grad_norm": 0.17023105919361115, "learning_rate": 1.2812706403725876e-05, "loss": 0.4626, "step": 5564 }, { "epoch": 1.2413562346642872, "grad_norm": 0.1609751284122467, "learning_rate": 1.2810447908148748e-05, "loss": 0.4751, "step": 5565 }, { "epoch": 1.2415792995761767, "grad_norm": 0.22108225524425507, "learning_rate": 1.2808189256911336e-05, "loss": 0.4497, "step": 5566 }, { "epoch": 1.241802364488066, "grad_norm": 0.1703910231590271, "learning_rate": 1.2805930450138742e-05, "loss": 0.4731, "step": 5567 }, { "epoch": 1.2420254293999553, "grad_norm": 0.16368292272090912, "learning_rate": 1.2803671487956063e-05, "loss": 0.459, "step": 5568 }, { "epoch": 1.2422484943118448, "grad_norm": 0.1646466702222824, "learning_rate": 1.2801412370488427e-05, "loss": 0.4756, "step": 5569 }, { "epoch": 1.242471559223734, "grad_norm": 0.16485071182250977, "learning_rate": 1.279915309786095e-05, "loss": 0.479, "step": 5570 }, { "epoch": 1.2426946241356234, "grad_norm": 0.17181314527988434, "learning_rate": 1.2796893670198767e-05, "loss": 0.4802, "step": 5571 }, { "epoch": 1.2429176890475129, "grad_norm": 0.17581303417682648, "learning_rate": 1.279463408762702e-05, "loss": 0.4747, "step": 5572 }, { "epoch": 1.2431407539594022, "grad_norm": 0.16636666655540466, "learning_rate": 1.2792374350270858e-05, "loss": 0.443, "step": 5573 }, { "epoch": 1.2433638188712917, "grad_norm": 0.1754438728094101, "learning_rate": 1.2790114458255441e-05, "loss": 0.4555, "step": 5574 }, { "epoch": 1.243586883783181, "grad_norm": 0.16133657097816467, "learning_rate": 1.2787854411705935e-05, "loss": 0.4426, "step": 5575 }, { "epoch": 1.2438099486950702, "grad_norm": 0.15894077718257904, "learning_rate": 1.2785594210747513e-05, "loss": 0.4511, "step": 5576 }, { "epoch": 1.2440330136069595, "grad_norm": 0.1614711582660675, "learning_rate": 1.2783333855505364e-05, "loss": 0.4712, "step": 5577 }, { "epoch": 1.244256078518849, "grad_norm": 0.16365355253219604, "learning_rate": 1.2781073346104677e-05, "loss": 0.4399, "step": 5578 }, { "epoch": 1.2444791434307383, "grad_norm": 0.16568869352340698, "learning_rate": 1.2778812682670654e-05, "loss": 0.4695, "step": 5579 }, { "epoch": 1.2447022083426278, "grad_norm": 0.16856862604618073, "learning_rate": 1.2776551865328503e-05, "loss": 0.4528, "step": 5580 }, { "epoch": 1.244925273254517, "grad_norm": 0.17370723187923431, "learning_rate": 1.277429089420345e-05, "loss": 0.4822, "step": 5581 }, { "epoch": 1.2451483381664064, "grad_norm": 0.16506825387477875, "learning_rate": 1.277202976942071e-05, "loss": 0.4354, "step": 5582 }, { "epoch": 1.2453714030782959, "grad_norm": 0.16328351199626923, "learning_rate": 1.276976849110553e-05, "loss": 0.4509, "step": 5583 }, { "epoch": 1.2455944679901851, "grad_norm": 0.16474324464797974, "learning_rate": 1.2767507059383144e-05, "loss": 0.4789, "step": 5584 }, { "epoch": 1.2458175329020744, "grad_norm": 0.16100145876407623, "learning_rate": 1.2765245474378814e-05, "loss": 0.442, "step": 5585 }, { "epoch": 1.246040597813964, "grad_norm": 0.1651872992515564, "learning_rate": 1.2762983736217792e-05, "loss": 0.4363, "step": 5586 }, { "epoch": 1.2462636627258532, "grad_norm": 0.1605336219072342, "learning_rate": 1.2760721845025353e-05, "loss": 0.4722, "step": 5587 }, { "epoch": 1.2464867276377425, "grad_norm": 0.16939808428287506, "learning_rate": 1.2758459800926768e-05, "loss": 0.4738, "step": 5588 }, { "epoch": 1.246709792549632, "grad_norm": 0.16064578294754028, "learning_rate": 1.2756197604047333e-05, "loss": 0.4487, "step": 5589 }, { "epoch": 1.2469328574615213, "grad_norm": 0.17054161429405212, "learning_rate": 1.2753935254512332e-05, "loss": 0.4863, "step": 5590 }, { "epoch": 1.2471559223734108, "grad_norm": 0.16378219425678253, "learning_rate": 1.2751672752447079e-05, "loss": 0.4373, "step": 5591 }, { "epoch": 1.2473789872853, "grad_norm": 0.16274695098400116, "learning_rate": 1.2749410097976878e-05, "loss": 0.4425, "step": 5592 }, { "epoch": 1.2476020521971893, "grad_norm": 0.16811704635620117, "learning_rate": 1.2747147291227053e-05, "loss": 0.4589, "step": 5593 }, { "epoch": 1.2478251171090786, "grad_norm": 0.1644984930753708, "learning_rate": 1.2744884332322926e-05, "loss": 0.4524, "step": 5594 }, { "epoch": 1.2480481820209681, "grad_norm": 0.17755252122879028, "learning_rate": 1.2742621221389846e-05, "loss": 0.4453, "step": 5595 }, { "epoch": 1.2482712469328574, "grad_norm": 0.17471030354499817, "learning_rate": 1.2740357958553144e-05, "loss": 0.5054, "step": 5596 }, { "epoch": 1.248494311844747, "grad_norm": 0.16155916452407837, "learning_rate": 1.2738094543938187e-05, "loss": 0.4663, "step": 5597 }, { "epoch": 1.2487173767566362, "grad_norm": 0.16668613255023956, "learning_rate": 1.2735830977670325e-05, "loss": 0.4664, "step": 5598 }, { "epoch": 1.2489404416685255, "grad_norm": 0.17190469801425934, "learning_rate": 1.2733567259874937e-05, "loss": 0.4645, "step": 5599 }, { "epoch": 1.249163506580415, "grad_norm": 0.17003987729549408, "learning_rate": 1.2731303390677399e-05, "loss": 0.4662, "step": 5600 }, { "epoch": 1.2493865714923043, "grad_norm": 0.175228089094162, "learning_rate": 1.2729039370203098e-05, "loss": 0.4609, "step": 5601 }, { "epoch": 1.2496096364041935, "grad_norm": 0.169545978307724, "learning_rate": 1.2726775198577432e-05, "loss": 0.4838, "step": 5602 }, { "epoch": 1.249832701316083, "grad_norm": 0.16817337274551392, "learning_rate": 1.2724510875925802e-05, "loss": 0.4786, "step": 5603 }, { "epoch": 1.2500557662279723, "grad_norm": 0.16374395787715912, "learning_rate": 1.2722246402373624e-05, "loss": 0.4848, "step": 5604 }, { "epoch": 1.2502788311398616, "grad_norm": 0.16421671211719513, "learning_rate": 1.2719981778046313e-05, "loss": 0.4712, "step": 5605 }, { "epoch": 1.250501896051751, "grad_norm": 0.16157956421375275, "learning_rate": 1.2717717003069305e-05, "loss": 0.4392, "step": 5606 }, { "epoch": 1.2507249609636404, "grad_norm": 0.1583690643310547, "learning_rate": 1.271545207756803e-05, "loss": 0.4425, "step": 5607 }, { "epoch": 1.25094802587553, "grad_norm": 0.18596141040325165, "learning_rate": 1.2713187001667943e-05, "loss": 0.4751, "step": 5608 }, { "epoch": 1.2511710907874192, "grad_norm": 0.19196903705596924, "learning_rate": 1.2710921775494494e-05, "loss": 0.4827, "step": 5609 }, { "epoch": 1.2513941556993085, "grad_norm": 0.16318334639072418, "learning_rate": 1.270865639917314e-05, "loss": 0.4548, "step": 5610 }, { "epoch": 1.2516172206111977, "grad_norm": 0.17583735287189484, "learning_rate": 1.270639087282936e-05, "loss": 0.4829, "step": 5611 }, { "epoch": 1.2518402855230872, "grad_norm": 0.19173632562160492, "learning_rate": 1.2704125196588628e-05, "loss": 0.4773, "step": 5612 }, { "epoch": 1.2520633504349765, "grad_norm": 0.15936903655529022, "learning_rate": 1.2701859370576432e-05, "loss": 0.428, "step": 5613 }, { "epoch": 1.252286415346866, "grad_norm": 0.16663479804992676, "learning_rate": 1.2699593394918273e-05, "loss": 0.4285, "step": 5614 }, { "epoch": 1.2525094802587553, "grad_norm": 0.16250313818454742, "learning_rate": 1.2697327269739646e-05, "loss": 0.471, "step": 5615 }, { "epoch": 1.2527325451706446, "grad_norm": 0.16274987161159515, "learning_rate": 1.2695060995166069e-05, "loss": 0.4566, "step": 5616 }, { "epoch": 1.252955610082534, "grad_norm": 0.17902643978595734, "learning_rate": 1.2692794571323064e-05, "loss": 0.4694, "step": 5617 }, { "epoch": 1.2531786749944234, "grad_norm": 0.16219644248485565, "learning_rate": 1.2690527998336153e-05, "loss": 0.4996, "step": 5618 }, { "epoch": 1.2534017399063129, "grad_norm": 0.16951128840446472, "learning_rate": 1.2688261276330882e-05, "loss": 0.4908, "step": 5619 }, { "epoch": 1.2536248048182022, "grad_norm": 0.1658773273229599, "learning_rate": 1.2685994405432788e-05, "loss": 0.4776, "step": 5620 }, { "epoch": 1.2538478697300914, "grad_norm": 0.15743979811668396, "learning_rate": 1.268372738576743e-05, "loss": 0.46, "step": 5621 }, { "epoch": 1.2540709346419807, "grad_norm": 0.16387997567653656, "learning_rate": 1.2681460217460365e-05, "loss": 0.4833, "step": 5622 }, { "epoch": 1.2542939995538702, "grad_norm": 0.16733065247535706, "learning_rate": 1.2679192900637172e-05, "loss": 0.4621, "step": 5623 }, { "epoch": 1.2545170644657595, "grad_norm": 0.16208809614181519, "learning_rate": 1.267692543542342e-05, "loss": 0.4688, "step": 5624 }, { "epoch": 1.254740129377649, "grad_norm": 0.16228267550468445, "learning_rate": 1.2674657821944699e-05, "loss": 0.4499, "step": 5625 }, { "epoch": 1.2549631942895383, "grad_norm": 0.17010217905044556, "learning_rate": 1.2672390060326603e-05, "loss": 0.4411, "step": 5626 }, { "epoch": 1.2551862592014276, "grad_norm": 0.1624649167060852, "learning_rate": 1.2670122150694737e-05, "loss": 0.451, "step": 5627 }, { "epoch": 1.2554093241133168, "grad_norm": 0.16551491618156433, "learning_rate": 1.2667854093174707e-05, "loss": 0.4502, "step": 5628 }, { "epoch": 1.2556323890252064, "grad_norm": 0.16004303097724915, "learning_rate": 1.266558588789214e-05, "loss": 0.4371, "step": 5629 }, { "epoch": 1.2558554539370956, "grad_norm": 0.1660148799419403, "learning_rate": 1.2663317534972656e-05, "loss": 0.4518, "step": 5630 }, { "epoch": 1.2560785188489851, "grad_norm": 0.16411980986595154, "learning_rate": 1.2661049034541897e-05, "loss": 0.4644, "step": 5631 }, { "epoch": 1.2563015837608744, "grad_norm": 0.16674858331680298, "learning_rate": 1.2658780386725503e-05, "loss": 0.4384, "step": 5632 }, { "epoch": 1.2565246486727637, "grad_norm": 0.17275111377239227, "learning_rate": 1.2656511591649125e-05, "loss": 0.4698, "step": 5633 }, { "epoch": 1.2567477135846532, "grad_norm": 0.16666240990161896, "learning_rate": 1.2654242649438426e-05, "loss": 0.4706, "step": 5634 }, { "epoch": 1.2569707784965425, "grad_norm": 0.1748340129852295, "learning_rate": 1.2651973560219073e-05, "loss": 0.4442, "step": 5635 }, { "epoch": 1.257193843408432, "grad_norm": 0.16753262281417847, "learning_rate": 1.2649704324116745e-05, "loss": 0.4586, "step": 5636 }, { "epoch": 1.2574169083203213, "grad_norm": 0.1677563339471817, "learning_rate": 1.264743494125712e-05, "loss": 0.4699, "step": 5637 }, { "epoch": 1.2576399732322106, "grad_norm": 0.16369731724262238, "learning_rate": 1.2645165411765899e-05, "loss": 0.4658, "step": 5638 }, { "epoch": 1.2578630381440998, "grad_norm": 0.18121913075447083, "learning_rate": 1.2642895735768775e-05, "loss": 0.453, "step": 5639 }, { "epoch": 1.2580861030559893, "grad_norm": 0.16915792226791382, "learning_rate": 1.2640625913391464e-05, "loss": 0.4496, "step": 5640 }, { "epoch": 1.2583091679678786, "grad_norm": 0.17231988906860352, "learning_rate": 1.2638355944759678e-05, "loss": 0.4594, "step": 5641 }, { "epoch": 1.2585322328797681, "grad_norm": 0.17405405640602112, "learning_rate": 1.2636085829999145e-05, "loss": 0.4665, "step": 5642 }, { "epoch": 1.2587552977916574, "grad_norm": 0.17699532210826874, "learning_rate": 1.2633815569235594e-05, "loss": 0.4856, "step": 5643 }, { "epoch": 1.2589783627035467, "grad_norm": 0.16303865611553192, "learning_rate": 1.2631545162594773e-05, "loss": 0.4475, "step": 5644 }, { "epoch": 1.259201427615436, "grad_norm": 0.40841978788375854, "learning_rate": 1.2629274610202427e-05, "loss": 0.4419, "step": 5645 }, { "epoch": 1.2594244925273255, "grad_norm": 0.17015258967876434, "learning_rate": 1.2627003912184315e-05, "loss": 0.4642, "step": 5646 }, { "epoch": 1.2596475574392147, "grad_norm": 0.16850386559963226, "learning_rate": 1.26247330686662e-05, "loss": 0.4751, "step": 5647 }, { "epoch": 1.2598706223511043, "grad_norm": 0.16403812170028687, "learning_rate": 1.2622462079773859e-05, "loss": 0.4555, "step": 5648 }, { "epoch": 1.2600936872629935, "grad_norm": 0.16475041210651398, "learning_rate": 1.2620190945633069e-05, "loss": 0.4456, "step": 5649 }, { "epoch": 1.2603167521748828, "grad_norm": 0.1695244163274765, "learning_rate": 1.2617919666369627e-05, "loss": 0.4489, "step": 5650 }, { "epoch": 1.2605398170867723, "grad_norm": 0.16356348991394043, "learning_rate": 1.2615648242109324e-05, "loss": 0.4545, "step": 5651 }, { "epoch": 1.2607628819986616, "grad_norm": 0.17045846581459045, "learning_rate": 1.2613376672977968e-05, "loss": 0.4734, "step": 5652 }, { "epoch": 1.260985946910551, "grad_norm": 0.15884579718112946, "learning_rate": 1.2611104959101374e-05, "loss": 0.4357, "step": 5653 }, { "epoch": 1.2612090118224404, "grad_norm": 0.16921289265155792, "learning_rate": 1.2608833100605361e-05, "loss": 0.4714, "step": 5654 }, { "epoch": 1.2614320767343297, "grad_norm": 0.16710084676742554, "learning_rate": 1.2606561097615764e-05, "loss": 0.4882, "step": 5655 }, { "epoch": 1.261655141646219, "grad_norm": 0.16774149239063263, "learning_rate": 1.2604288950258414e-05, "loss": 0.4501, "step": 5656 }, { "epoch": 1.2618782065581085, "grad_norm": 0.16943363845348358, "learning_rate": 1.2602016658659167e-05, "loss": 0.4764, "step": 5657 }, { "epoch": 1.2621012714699977, "grad_norm": 0.16601496934890747, "learning_rate": 1.2599744222943864e-05, "loss": 0.4549, "step": 5658 }, { "epoch": 1.2623243363818872, "grad_norm": 0.1612405627965927, "learning_rate": 1.2597471643238372e-05, "loss": 0.4451, "step": 5659 }, { "epoch": 1.2625474012937765, "grad_norm": 0.16680100560188293, "learning_rate": 1.2595198919668566e-05, "loss": 0.475, "step": 5660 }, { "epoch": 1.2627704662056658, "grad_norm": 0.16711308062076569, "learning_rate": 1.2592926052360316e-05, "loss": 0.4574, "step": 5661 }, { "epoch": 1.262993531117555, "grad_norm": 0.16317816078662872, "learning_rate": 1.259065304143951e-05, "loss": 0.4345, "step": 5662 }, { "epoch": 1.2632165960294446, "grad_norm": 0.16551434993743896, "learning_rate": 1.2588379887032048e-05, "loss": 0.4706, "step": 5663 }, { "epoch": 1.2634396609413339, "grad_norm": 0.16414450109004974, "learning_rate": 1.2586106589263823e-05, "loss": 0.4574, "step": 5664 }, { "epoch": 1.2636627258532234, "grad_norm": 0.17107480764389038, "learning_rate": 1.2583833148260749e-05, "loss": 0.4698, "step": 5665 }, { "epoch": 1.2638857907651126, "grad_norm": 0.17316211760044098, "learning_rate": 1.258155956414874e-05, "loss": 0.4772, "step": 5666 }, { "epoch": 1.264108855677002, "grad_norm": 0.16382986307144165, "learning_rate": 1.2579285837053722e-05, "loss": 0.4463, "step": 5667 }, { "epoch": 1.2643319205888914, "grad_norm": 0.20516705513000488, "learning_rate": 1.2577011967101636e-05, "loss": 0.4539, "step": 5668 }, { "epoch": 1.2645549855007807, "grad_norm": 0.17769816517829895, "learning_rate": 1.2574737954418412e-05, "loss": 0.4889, "step": 5669 }, { "epoch": 1.2647780504126702, "grad_norm": 0.16899065673351288, "learning_rate": 1.2572463799130008e-05, "loss": 0.476, "step": 5670 }, { "epoch": 1.2650011153245595, "grad_norm": 0.16549617052078247, "learning_rate": 1.2570189501362375e-05, "loss": 0.4493, "step": 5671 }, { "epoch": 1.2652241802364488, "grad_norm": 0.16648399829864502, "learning_rate": 1.2567915061241483e-05, "loss": 0.4375, "step": 5672 }, { "epoch": 1.265447245148338, "grad_norm": 0.16941656172275543, "learning_rate": 1.2565640478893299e-05, "loss": 0.4543, "step": 5673 }, { "epoch": 1.2656703100602276, "grad_norm": 0.1804044395685196, "learning_rate": 1.2563365754443808e-05, "loss": 0.4722, "step": 5674 }, { "epoch": 1.2658933749721168, "grad_norm": 0.18117646872997284, "learning_rate": 1.2561090888018996e-05, "loss": 0.4869, "step": 5675 }, { "epoch": 1.2661164398840064, "grad_norm": 0.16993004083633423, "learning_rate": 1.2558815879744865e-05, "loss": 0.4665, "step": 5676 }, { "epoch": 1.2663395047958956, "grad_norm": 0.19133198261260986, "learning_rate": 1.255654072974741e-05, "loss": 0.4619, "step": 5677 }, { "epoch": 1.266562569707785, "grad_norm": 0.17450137436389923, "learning_rate": 1.2554265438152653e-05, "loss": 0.4638, "step": 5678 }, { "epoch": 1.2667856346196742, "grad_norm": 0.1640671044588089, "learning_rate": 1.2551990005086604e-05, "loss": 0.4364, "step": 5679 }, { "epoch": 1.2670086995315637, "grad_norm": 0.16710147261619568, "learning_rate": 1.2549714430675299e-05, "loss": 0.4479, "step": 5680 }, { "epoch": 1.267231764443453, "grad_norm": 0.17442373931407928, "learning_rate": 1.2547438715044769e-05, "loss": 0.4517, "step": 5681 }, { "epoch": 1.2674548293553425, "grad_norm": 0.18162380158901215, "learning_rate": 1.254516285832106e-05, "loss": 0.4829, "step": 5682 }, { "epoch": 1.2676778942672318, "grad_norm": 0.17129097878932953, "learning_rate": 1.2542886860630221e-05, "loss": 0.4519, "step": 5683 }, { "epoch": 1.267900959179121, "grad_norm": 0.1645333468914032, "learning_rate": 1.2540610722098314e-05, "loss": 0.4544, "step": 5684 }, { "epoch": 1.2681240240910105, "grad_norm": 0.21202479302883148, "learning_rate": 1.2538334442851403e-05, "loss": 0.5017, "step": 5685 }, { "epoch": 1.2683470890028998, "grad_norm": 0.21118725836277008, "learning_rate": 1.2536058023015563e-05, "loss": 0.477, "step": 5686 }, { "epoch": 1.2685701539147893, "grad_norm": 0.16621573269367218, "learning_rate": 1.2533781462716879e-05, "loss": 0.4427, "step": 5687 }, { "epoch": 1.2687932188266786, "grad_norm": 0.20093737542629242, "learning_rate": 1.2531504762081437e-05, "loss": 0.4873, "step": 5688 }, { "epoch": 1.269016283738568, "grad_norm": 0.1732548326253891, "learning_rate": 1.2529227921235342e-05, "loss": 0.4745, "step": 5689 }, { "epoch": 1.2692393486504572, "grad_norm": 0.1718747317790985, "learning_rate": 1.252695094030469e-05, "loss": 0.4478, "step": 5690 }, { "epoch": 1.2694624135623467, "grad_norm": 0.1892472505569458, "learning_rate": 1.2524673819415602e-05, "loss": 0.457, "step": 5691 }, { "epoch": 1.269685478474236, "grad_norm": 0.16968894004821777, "learning_rate": 1.2522396558694197e-05, "loss": 0.4734, "step": 5692 }, { "epoch": 1.2699085433861255, "grad_norm": 0.1685362160205841, "learning_rate": 1.2520119158266606e-05, "loss": 0.4233, "step": 5693 }, { "epoch": 1.2701316082980147, "grad_norm": 0.16388767957687378, "learning_rate": 1.2517841618258961e-05, "loss": 0.4135, "step": 5694 }, { "epoch": 1.270354673209904, "grad_norm": 0.16109982132911682, "learning_rate": 1.251556393879741e-05, "loss": 0.4484, "step": 5695 }, { "epoch": 1.2705777381217933, "grad_norm": 0.18354761600494385, "learning_rate": 1.2513286120008105e-05, "loss": 0.4358, "step": 5696 }, { "epoch": 1.2708008030336828, "grad_norm": 0.17148545384407043, "learning_rate": 1.2511008162017209e-05, "loss": 0.4646, "step": 5697 }, { "epoch": 1.271023867945572, "grad_norm": 0.17020155489444733, "learning_rate": 1.2508730064950881e-05, "loss": 0.4587, "step": 5698 }, { "epoch": 1.2712469328574616, "grad_norm": 0.16151095926761627, "learning_rate": 1.2506451828935303e-05, "loss": 0.4886, "step": 5699 }, { "epoch": 1.2714699977693509, "grad_norm": 0.1669510155916214, "learning_rate": 1.2504173454096658e-05, "loss": 0.4432, "step": 5700 }, { "epoch": 1.2716930626812402, "grad_norm": 0.16603168845176697, "learning_rate": 1.2501894940561133e-05, "loss": 0.4556, "step": 5701 }, { "epoch": 1.2719161275931297, "grad_norm": 0.1668505221605301, "learning_rate": 1.249961628845493e-05, "loss": 0.477, "step": 5702 }, { "epoch": 1.272139192505019, "grad_norm": 0.1614041030406952, "learning_rate": 1.2497337497904251e-05, "loss": 0.4273, "step": 5703 }, { "epoch": 1.2723622574169084, "grad_norm": 0.1707865595817566, "learning_rate": 1.2495058569035316e-05, "loss": 0.4562, "step": 5704 }, { "epoch": 1.2725853223287977, "grad_norm": 0.17685820162296295, "learning_rate": 1.249277950197434e-05, "loss": 0.4693, "step": 5705 }, { "epoch": 1.272808387240687, "grad_norm": 0.16811825335025787, "learning_rate": 1.2490500296847558e-05, "loss": 0.4576, "step": 5706 }, { "epoch": 1.2730314521525763, "grad_norm": 0.17165428400039673, "learning_rate": 1.2488220953781201e-05, "loss": 0.4851, "step": 5707 }, { "epoch": 1.2732545170644658, "grad_norm": 0.1754395067691803, "learning_rate": 1.2485941472901519e-05, "loss": 0.4895, "step": 5708 }, { "epoch": 1.273477581976355, "grad_norm": 0.1676994264125824, "learning_rate": 1.2483661854334756e-05, "loss": 0.4675, "step": 5709 }, { "epoch": 1.2737006468882446, "grad_norm": 0.16332073509693146, "learning_rate": 1.2481382098207181e-05, "loss": 0.4444, "step": 5710 }, { "epoch": 1.2739237118001339, "grad_norm": 0.175141841173172, "learning_rate": 1.2479102204645057e-05, "loss": 0.4934, "step": 5711 }, { "epoch": 1.2741467767120231, "grad_norm": 0.17090824246406555, "learning_rate": 1.247682217377466e-05, "loss": 0.4588, "step": 5712 }, { "epoch": 1.2743698416239124, "grad_norm": 0.1759280413389206, "learning_rate": 1.2474542005722265e-05, "loss": 0.4487, "step": 5713 }, { "epoch": 1.274592906535802, "grad_norm": 0.16140590608119965, "learning_rate": 1.2472261700614174e-05, "loss": 0.451, "step": 5714 }, { "epoch": 1.2748159714476912, "grad_norm": 0.19568322598934174, "learning_rate": 1.2469981258576676e-05, "loss": 0.4726, "step": 5715 }, { "epoch": 1.2750390363595807, "grad_norm": 0.16794992983341217, "learning_rate": 1.246770067973608e-05, "loss": 0.465, "step": 5716 }, { "epoch": 1.27526210127147, "grad_norm": 0.17148984968662262, "learning_rate": 1.2465419964218696e-05, "loss": 0.4809, "step": 5717 }, { "epoch": 1.2754851661833593, "grad_norm": 0.17300093173980713, "learning_rate": 1.2463139112150851e-05, "loss": 0.4417, "step": 5718 }, { "epoch": 1.2757082310952488, "grad_norm": 0.16525490581989288, "learning_rate": 1.2460858123658863e-05, "loss": 0.4744, "step": 5719 }, { "epoch": 1.275931296007138, "grad_norm": 0.16058532893657684, "learning_rate": 1.2458576998869076e-05, "loss": 0.4494, "step": 5720 }, { "epoch": 1.2761543609190276, "grad_norm": 0.16562692821025848, "learning_rate": 1.2456295737907828e-05, "loss": 0.3971, "step": 5721 }, { "epoch": 1.2763774258309168, "grad_norm": 0.1694319099187851, "learning_rate": 1.2454014340901472e-05, "loss": 0.4956, "step": 5722 }, { "epoch": 1.2766004907428061, "grad_norm": 0.1706772893667221, "learning_rate": 1.2451732807976367e-05, "loss": 0.487, "step": 5723 }, { "epoch": 1.2768235556546954, "grad_norm": 0.16685868799686432, "learning_rate": 1.2449451139258875e-05, "loss": 0.4671, "step": 5724 }, { "epoch": 1.277046620566585, "grad_norm": 0.16977933049201965, "learning_rate": 1.2447169334875374e-05, "loss": 0.4633, "step": 5725 }, { "epoch": 1.2772696854784742, "grad_norm": 0.1995961219072342, "learning_rate": 1.2444887394952237e-05, "loss": 0.4676, "step": 5726 }, { "epoch": 1.2774927503903637, "grad_norm": 0.17159555852413177, "learning_rate": 1.2442605319615862e-05, "loss": 0.4884, "step": 5727 }, { "epoch": 1.277715815302253, "grad_norm": 0.18190377950668335, "learning_rate": 1.2440323108992635e-05, "loss": 0.4802, "step": 5728 }, { "epoch": 1.2779388802141423, "grad_norm": 0.16413584351539612, "learning_rate": 1.2438040763208967e-05, "loss": 0.4475, "step": 5729 }, { "epoch": 1.2781619451260315, "grad_norm": 0.17521817982196808, "learning_rate": 1.2435758282391266e-05, "loss": 0.4559, "step": 5730 }, { "epoch": 1.278385010037921, "grad_norm": 0.17562466859817505, "learning_rate": 1.243347566666595e-05, "loss": 0.4614, "step": 5731 }, { "epoch": 1.2786080749498103, "grad_norm": 0.18439187109470367, "learning_rate": 1.2431192916159442e-05, "loss": 0.4739, "step": 5732 }, { "epoch": 1.2788311398616998, "grad_norm": 0.17046132683753967, "learning_rate": 1.242891003099818e-05, "loss": 0.4653, "step": 5733 }, { "epoch": 1.279054204773589, "grad_norm": 0.17075121402740479, "learning_rate": 1.24266270113086e-05, "loss": 0.5025, "step": 5734 }, { "epoch": 1.2792772696854784, "grad_norm": 0.1661718189716339, "learning_rate": 1.2424343857217153e-05, "loss": 0.4574, "step": 5735 }, { "epoch": 1.279500334597368, "grad_norm": 0.1734541356563568, "learning_rate": 1.2422060568850293e-05, "loss": 0.4579, "step": 5736 }, { "epoch": 1.2797233995092572, "grad_norm": 0.16650691628456116, "learning_rate": 1.2419777146334486e-05, "loss": 0.4793, "step": 5737 }, { "epoch": 1.2799464644211467, "grad_norm": 0.16996720433235168, "learning_rate": 1.2417493589796199e-05, "loss": 0.4679, "step": 5738 }, { "epoch": 1.280169529333036, "grad_norm": 0.1680038571357727, "learning_rate": 1.2415209899361908e-05, "loss": 0.4842, "step": 5739 }, { "epoch": 1.2803925942449252, "grad_norm": 0.21281017363071442, "learning_rate": 1.2412926075158103e-05, "loss": 0.4534, "step": 5740 }, { "epoch": 1.2806156591568145, "grad_norm": 0.16885453462600708, "learning_rate": 1.241064211731127e-05, "loss": 0.4758, "step": 5741 }, { "epoch": 1.280838724068704, "grad_norm": 0.19396549463272095, "learning_rate": 1.2408358025947917e-05, "loss": 0.4729, "step": 5742 }, { "epoch": 1.2810617889805933, "grad_norm": 0.1660171002149582, "learning_rate": 1.2406073801194546e-05, "loss": 0.4574, "step": 5743 }, { "epoch": 1.2812848538924828, "grad_norm": 0.17650169134140015, "learning_rate": 1.2403789443177672e-05, "loss": 0.452, "step": 5744 }, { "epoch": 1.281507918804372, "grad_norm": 0.16995863616466522, "learning_rate": 1.240150495202382e-05, "loss": 0.4556, "step": 5745 }, { "epoch": 1.2817309837162614, "grad_norm": 0.17916269600391388, "learning_rate": 1.2399220327859516e-05, "loss": 0.4595, "step": 5746 }, { "epoch": 1.2819540486281507, "grad_norm": 0.16572338342666626, "learning_rate": 1.2396935570811299e-05, "loss": 0.4408, "step": 5747 }, { "epoch": 1.2821771135400402, "grad_norm": 0.1570359319448471, "learning_rate": 1.2394650681005713e-05, "loss": 0.4357, "step": 5748 }, { "epoch": 1.2824001784519294, "grad_norm": 0.1629701554775238, "learning_rate": 1.239236565856931e-05, "loss": 0.4481, "step": 5749 }, { "epoch": 1.282623243363819, "grad_norm": 0.17372570931911469, "learning_rate": 1.2390080503628647e-05, "loss": 0.4717, "step": 5750 }, { "epoch": 1.2828463082757082, "grad_norm": 0.1692957580089569, "learning_rate": 1.2387795216310292e-05, "loss": 0.4665, "step": 5751 }, { "epoch": 1.2830693731875975, "grad_norm": 0.16883119940757751, "learning_rate": 1.2385509796740818e-05, "loss": 0.4566, "step": 5752 }, { "epoch": 1.283292438099487, "grad_norm": 0.1655278503894806, "learning_rate": 1.2383224245046805e-05, "loss": 0.4482, "step": 5753 }, { "epoch": 1.2835155030113763, "grad_norm": 0.17099110782146454, "learning_rate": 1.2380938561354846e-05, "loss": 0.4733, "step": 5754 }, { "epoch": 1.2837385679232658, "grad_norm": 0.16379962861537933, "learning_rate": 1.2378652745791528e-05, "loss": 0.4337, "step": 5755 }, { "epoch": 1.283961632835155, "grad_norm": 0.1781388372182846, "learning_rate": 1.237636679848346e-05, "loss": 0.4642, "step": 5756 }, { "epoch": 1.2841846977470444, "grad_norm": 0.16018837690353394, "learning_rate": 1.2374080719557253e-05, "loss": 0.4488, "step": 5757 }, { "epoch": 1.2844077626589336, "grad_norm": 0.17385733127593994, "learning_rate": 1.237179450913952e-05, "loss": 0.4448, "step": 5758 }, { "epoch": 1.2846308275708231, "grad_norm": 0.1600496470928192, "learning_rate": 1.236950816735689e-05, "loss": 0.4523, "step": 5759 }, { "epoch": 1.2848538924827124, "grad_norm": 0.17114561796188354, "learning_rate": 1.2367221694335992e-05, "loss": 0.4717, "step": 5760 }, { "epoch": 1.285076957394602, "grad_norm": 0.17317497730255127, "learning_rate": 1.2364935090203464e-05, "loss": 0.4571, "step": 5761 }, { "epoch": 1.2853000223064912, "grad_norm": 0.18465475738048553, "learning_rate": 1.2362648355085958e-05, "loss": 0.4622, "step": 5762 }, { "epoch": 1.2855230872183805, "grad_norm": 0.16968859732151031, "learning_rate": 1.2360361489110123e-05, "loss": 0.445, "step": 5763 }, { "epoch": 1.2857461521302698, "grad_norm": 0.16804584860801697, "learning_rate": 1.235807449240262e-05, "loss": 0.4482, "step": 5764 }, { "epoch": 1.2859692170421593, "grad_norm": 0.170896977186203, "learning_rate": 1.2355787365090122e-05, "loss": 0.4517, "step": 5765 }, { "epoch": 1.2861922819540486, "grad_norm": 0.1710241287946701, "learning_rate": 1.2353500107299299e-05, "loss": 0.4884, "step": 5766 }, { "epoch": 1.286415346865938, "grad_norm": 0.16201786696910858, "learning_rate": 1.2351212719156835e-05, "loss": 0.4542, "step": 5767 }, { "epoch": 1.2866384117778273, "grad_norm": 0.16651326417922974, "learning_rate": 1.234892520078942e-05, "loss": 0.4649, "step": 5768 }, { "epoch": 1.2868614766897166, "grad_norm": 0.16634796559810638, "learning_rate": 1.2346637552323757e-05, "loss": 0.4793, "step": 5769 }, { "epoch": 1.2870845416016061, "grad_norm": 0.16899818181991577, "learning_rate": 1.2344349773886542e-05, "loss": 0.4633, "step": 5770 }, { "epoch": 1.2873076065134954, "grad_norm": 0.17024937272071838, "learning_rate": 1.2342061865604492e-05, "loss": 0.4471, "step": 5771 }, { "epoch": 1.287530671425385, "grad_norm": 0.1707090586423874, "learning_rate": 1.2339773827604322e-05, "loss": 0.4758, "step": 5772 }, { "epoch": 1.2877537363372742, "grad_norm": 0.17313364148139954, "learning_rate": 1.2337485660012757e-05, "loss": 0.4858, "step": 5773 }, { "epoch": 1.2879768012491635, "grad_norm": 0.1700849086046219, "learning_rate": 1.2335197362956537e-05, "loss": 0.4505, "step": 5774 }, { "epoch": 1.2881998661610528, "grad_norm": 0.17922791838645935, "learning_rate": 1.2332908936562395e-05, "loss": 0.4559, "step": 5775 }, { "epoch": 1.2884229310729423, "grad_norm": 0.1638413369655609, "learning_rate": 1.2330620380957086e-05, "loss": 0.4526, "step": 5776 }, { "epoch": 1.2886459959848315, "grad_norm": 0.15971173346042633, "learning_rate": 1.2328331696267357e-05, "loss": 0.4386, "step": 5777 }, { "epoch": 1.288869060896721, "grad_norm": 0.17769648134708405, "learning_rate": 1.2326042882619973e-05, "loss": 0.4659, "step": 5778 }, { "epoch": 1.2890921258086103, "grad_norm": 0.16623780131340027, "learning_rate": 1.2323753940141704e-05, "loss": 0.4593, "step": 5779 }, { "epoch": 1.2893151907204996, "grad_norm": 0.16500690579414368, "learning_rate": 1.2321464868959326e-05, "loss": 0.45, "step": 5780 }, { "epoch": 1.289538255632389, "grad_norm": 0.17217601835727692, "learning_rate": 1.2319175669199619e-05, "loss": 0.4747, "step": 5781 }, { "epoch": 1.2897613205442784, "grad_norm": 0.16614340245723724, "learning_rate": 1.2316886340989375e-05, "loss": 0.4685, "step": 5782 }, { "epoch": 1.2899843854561677, "grad_norm": 0.1841314285993576, "learning_rate": 1.231459688445539e-05, "loss": 0.4537, "step": 5783 }, { "epoch": 1.2902074503680572, "grad_norm": 0.1670687049627304, "learning_rate": 1.231230729972447e-05, "loss": 0.4763, "step": 5784 }, { "epoch": 1.2904305152799465, "grad_norm": 0.1700257658958435, "learning_rate": 1.2310017586923431e-05, "loss": 0.4698, "step": 5785 }, { "epoch": 1.2906535801918357, "grad_norm": 0.17322078347206116, "learning_rate": 1.2307727746179085e-05, "loss": 0.4422, "step": 5786 }, { "epoch": 1.2908766451037252, "grad_norm": 0.18490198254585266, "learning_rate": 1.230543777761826e-05, "loss": 0.451, "step": 5787 }, { "epoch": 1.2910997100156145, "grad_norm": 0.17593975365161896, "learning_rate": 1.2303147681367788e-05, "loss": 0.4806, "step": 5788 }, { "epoch": 1.291322774927504, "grad_norm": 0.16735313832759857, "learning_rate": 1.2300857457554513e-05, "loss": 0.4551, "step": 5789 }, { "epoch": 1.2915458398393933, "grad_norm": 0.18818075954914093, "learning_rate": 1.2298567106305277e-05, "loss": 0.4722, "step": 5790 }, { "epoch": 1.2917689047512826, "grad_norm": 0.33603474497795105, "learning_rate": 1.2296276627746938e-05, "loss": 0.4789, "step": 5791 }, { "epoch": 1.2919919696631719, "grad_norm": 0.16247035562992096, "learning_rate": 1.2293986022006353e-05, "loss": 0.4493, "step": 5792 }, { "epoch": 1.2922150345750614, "grad_norm": 0.17141158878803253, "learning_rate": 1.2291695289210395e-05, "loss": 0.4718, "step": 5793 }, { "epoch": 1.2924380994869507, "grad_norm": 0.15925908088684082, "learning_rate": 1.2289404429485932e-05, "loss": 0.4805, "step": 5794 }, { "epoch": 1.2926611643988402, "grad_norm": 0.2104969471693039, "learning_rate": 1.2287113442959854e-05, "loss": 0.4718, "step": 5795 }, { "epoch": 1.2928842293107294, "grad_norm": 0.1778918355703354, "learning_rate": 1.2284822329759047e-05, "loss": 0.4708, "step": 5796 }, { "epoch": 1.2931072942226187, "grad_norm": 0.1711834967136383, "learning_rate": 1.2282531090010408e-05, "loss": 0.4613, "step": 5797 }, { "epoch": 1.2933303591345082, "grad_norm": 0.17444384098052979, "learning_rate": 1.2280239723840836e-05, "loss": 0.4477, "step": 5798 }, { "epoch": 1.2935534240463975, "grad_norm": 0.16771963238716125, "learning_rate": 1.2277948231377247e-05, "loss": 0.4698, "step": 5799 }, { "epoch": 1.2937764889582868, "grad_norm": 0.1844215989112854, "learning_rate": 1.2275656612746556e-05, "loss": 0.4681, "step": 5800 }, { "epoch": 1.2939995538701763, "grad_norm": 0.1627340018749237, "learning_rate": 1.227336486807569e-05, "loss": 0.4758, "step": 5801 }, { "epoch": 1.2942226187820656, "grad_norm": 0.17234186828136444, "learning_rate": 1.2271072997491573e-05, "loss": 0.4646, "step": 5802 }, { "epoch": 1.2944456836939549, "grad_norm": 0.169833242893219, "learning_rate": 1.2268781001121151e-05, "loss": 0.4709, "step": 5803 }, { "epoch": 1.2946687486058444, "grad_norm": 0.16857881844043732, "learning_rate": 1.2266488879091365e-05, "loss": 0.484, "step": 5804 }, { "epoch": 1.2948918135177336, "grad_norm": 0.16517959535121918, "learning_rate": 1.2264196631529166e-05, "loss": 0.4608, "step": 5805 }, { "epoch": 1.2951148784296231, "grad_norm": 0.16706417500972748, "learning_rate": 1.226190425856152e-05, "loss": 0.4541, "step": 5806 }, { "epoch": 1.2953379433415124, "grad_norm": 0.16569514572620392, "learning_rate": 1.2259611760315381e-05, "loss": 0.4393, "step": 5807 }, { "epoch": 1.2955610082534017, "grad_norm": 0.17674359679222107, "learning_rate": 1.2257319136917735e-05, "loss": 0.4869, "step": 5808 }, { "epoch": 1.295784073165291, "grad_norm": 0.1684064269065857, "learning_rate": 1.2255026388495554e-05, "loss": 0.488, "step": 5809 }, { "epoch": 1.2960071380771805, "grad_norm": 0.16627109050750732, "learning_rate": 1.2252733515175829e-05, "loss": 0.4577, "step": 5810 }, { "epoch": 1.2962302029890698, "grad_norm": 0.18663783371448517, "learning_rate": 1.2250440517085549e-05, "loss": 0.4512, "step": 5811 }, { "epoch": 1.2964532679009593, "grad_norm": 0.17597997188568115, "learning_rate": 1.2248147394351719e-05, "loss": 0.4482, "step": 5812 }, { "epoch": 1.2966763328128486, "grad_norm": 0.1624067723751068, "learning_rate": 1.2245854147101344e-05, "loss": 0.4809, "step": 5813 }, { "epoch": 1.2968993977247378, "grad_norm": 0.1702115684747696, "learning_rate": 1.2243560775461441e-05, "loss": 0.4572, "step": 5814 }, { "epoch": 1.2971224626366273, "grad_norm": 0.17795614898204803, "learning_rate": 1.2241267279559029e-05, "loss": 0.4668, "step": 5815 }, { "epoch": 1.2973455275485166, "grad_norm": 0.17128905653953552, "learning_rate": 1.2238973659521136e-05, "loss": 0.4649, "step": 5816 }, { "epoch": 1.297568592460406, "grad_norm": 0.1639898121356964, "learning_rate": 1.2236679915474799e-05, "loss": 0.4741, "step": 5817 }, { "epoch": 1.2977916573722954, "grad_norm": 0.17181125283241272, "learning_rate": 1.2234386047547057e-05, "loss": 0.4569, "step": 5818 }, { "epoch": 1.2980147222841847, "grad_norm": 0.17386600375175476, "learning_rate": 1.2232092055864961e-05, "loss": 0.4536, "step": 5819 }, { "epoch": 1.298237787196074, "grad_norm": 0.1769663542509079, "learning_rate": 1.222979794055557e-05, "loss": 0.461, "step": 5820 }, { "epoch": 1.2984608521079635, "grad_norm": 0.17835234105587006, "learning_rate": 1.2227503701745942e-05, "loss": 0.4535, "step": 5821 }, { "epoch": 1.2986839170198528, "grad_norm": 0.1780080497264862, "learning_rate": 1.2225209339563144e-05, "loss": 0.4532, "step": 5822 }, { "epoch": 1.2989069819317423, "grad_norm": 0.1722138375043869, "learning_rate": 1.2222914854134261e-05, "loss": 0.4626, "step": 5823 }, { "epoch": 1.2991300468436315, "grad_norm": 0.1659894585609436, "learning_rate": 1.2220620245586365e-05, "loss": 0.4466, "step": 5824 }, { "epoch": 1.2993531117555208, "grad_norm": 0.17937776446342468, "learning_rate": 1.2218325514046557e-05, "loss": 0.4572, "step": 5825 }, { "epoch": 1.29957617666741, "grad_norm": 0.18704891204833984, "learning_rate": 1.2216030659641924e-05, "loss": 0.4745, "step": 5826 }, { "epoch": 1.2997992415792996, "grad_norm": 0.17715615034103394, "learning_rate": 1.2213735682499578e-05, "loss": 0.4536, "step": 5827 }, { "epoch": 1.3000223064911889, "grad_norm": 0.1589982509613037, "learning_rate": 1.2211440582746619e-05, "loss": 0.4619, "step": 5828 }, { "epoch": 1.3002453714030784, "grad_norm": 0.1834399253129959, "learning_rate": 1.2209145360510175e-05, "loss": 0.457, "step": 5829 }, { "epoch": 1.3004684363149677, "grad_norm": 0.16908270120620728, "learning_rate": 1.2206850015917362e-05, "loss": 0.4432, "step": 5830 }, { "epoch": 1.300691501226857, "grad_norm": 0.16872212290763855, "learning_rate": 1.2204554549095316e-05, "loss": 0.4378, "step": 5831 }, { "epoch": 1.3009145661387465, "grad_norm": 0.1816396862268448, "learning_rate": 1.2202258960171167e-05, "loss": 0.4645, "step": 5832 }, { "epoch": 1.3011376310506357, "grad_norm": 0.17055541276931763, "learning_rate": 1.219996324927207e-05, "loss": 0.4567, "step": 5833 }, { "epoch": 1.301360695962525, "grad_norm": 0.17526254057884216, "learning_rate": 1.2197667416525165e-05, "loss": 0.4335, "step": 5834 }, { "epoch": 1.3015837608744145, "grad_norm": 0.17466293275356293, "learning_rate": 1.2195371462057619e-05, "loss": 0.4645, "step": 5835 }, { "epoch": 1.3018068257863038, "grad_norm": 0.1705324649810791, "learning_rate": 1.2193075385996589e-05, "loss": 0.4782, "step": 5836 }, { "epoch": 1.302029890698193, "grad_norm": 0.1765318214893341, "learning_rate": 1.2190779188469248e-05, "loss": 0.4651, "step": 5837 }, { "epoch": 1.3022529556100826, "grad_norm": 0.18065306544303894, "learning_rate": 1.2188482869602778e-05, "loss": 0.4507, "step": 5838 }, { "epoch": 1.3024760205219719, "grad_norm": 0.17382916808128357, "learning_rate": 1.2186186429524358e-05, "loss": 0.4621, "step": 5839 }, { "epoch": 1.3026990854338614, "grad_norm": 0.17227432131767273, "learning_rate": 1.2183889868361185e-05, "loss": 0.4493, "step": 5840 }, { "epoch": 1.3029221503457507, "grad_norm": 0.16566970944404602, "learning_rate": 1.218159318624045e-05, "loss": 0.4613, "step": 5841 }, { "epoch": 1.30314521525764, "grad_norm": 0.16630716621875763, "learning_rate": 1.2179296383289366e-05, "loss": 0.4703, "step": 5842 }, { "epoch": 1.3033682801695292, "grad_norm": 0.16445283591747284, "learning_rate": 1.2176999459635137e-05, "loss": 0.471, "step": 5843 }, { "epoch": 1.3035913450814187, "grad_norm": 0.1610761433839798, "learning_rate": 1.2174702415404987e-05, "loss": 0.4669, "step": 5844 }, { "epoch": 1.303814409993308, "grad_norm": 0.16646409034729004, "learning_rate": 1.2172405250726134e-05, "loss": 0.4691, "step": 5845 }, { "epoch": 1.3040374749051975, "grad_norm": 0.173641636967659, "learning_rate": 1.2170107965725815e-05, "loss": 0.4525, "step": 5846 }, { "epoch": 1.3042605398170868, "grad_norm": 0.16962803900241852, "learning_rate": 1.2167810560531266e-05, "loss": 0.4491, "step": 5847 }, { "epoch": 1.304483604728976, "grad_norm": 0.1649630218744278, "learning_rate": 1.2165513035269733e-05, "loss": 0.4409, "step": 5848 }, { "epoch": 1.3047066696408656, "grad_norm": 0.1694600135087967, "learning_rate": 1.2163215390068466e-05, "loss": 0.47, "step": 5849 }, { "epoch": 1.3049297345527548, "grad_norm": 0.1770717054605484, "learning_rate": 1.2160917625054721e-05, "loss": 0.4647, "step": 5850 }, { "epoch": 1.3051527994646441, "grad_norm": 0.15664999186992645, "learning_rate": 1.2158619740355767e-05, "loss": 0.4371, "step": 5851 }, { "epoch": 1.3053758643765336, "grad_norm": 0.16437092423439026, "learning_rate": 1.2156321736098877e-05, "loss": 0.436, "step": 5852 }, { "epoch": 1.305598929288423, "grad_norm": 0.17147205770015717, "learning_rate": 1.2154023612411321e-05, "loss": 0.4696, "step": 5853 }, { "epoch": 1.3058219942003122, "grad_norm": 0.1643034666776657, "learning_rate": 1.215172536942039e-05, "loss": 0.472, "step": 5854 }, { "epoch": 1.3060450591122017, "grad_norm": 0.1705443561077118, "learning_rate": 1.2149427007253372e-05, "loss": 0.4422, "step": 5855 }, { "epoch": 1.306268124024091, "grad_norm": 0.1692255437374115, "learning_rate": 1.2147128526037568e-05, "loss": 0.4583, "step": 5856 }, { "epoch": 1.3064911889359805, "grad_norm": 0.17892007529735565, "learning_rate": 1.2144829925900278e-05, "loss": 0.4834, "step": 5857 }, { "epoch": 1.3067142538478698, "grad_norm": 0.1718159317970276, "learning_rate": 1.2142531206968815e-05, "loss": 0.4576, "step": 5858 }, { "epoch": 1.306937318759759, "grad_norm": 0.16335871815681458, "learning_rate": 1.21402323693705e-05, "loss": 0.4709, "step": 5859 }, { "epoch": 1.3071603836716483, "grad_norm": 0.1720331460237503, "learning_rate": 1.2137933413232651e-05, "loss": 0.4536, "step": 5860 }, { "epoch": 1.3073834485835378, "grad_norm": 0.17654761672019958, "learning_rate": 1.2135634338682605e-05, "loss": 0.4706, "step": 5861 }, { "epoch": 1.3076065134954271, "grad_norm": 0.16412043571472168, "learning_rate": 1.2133335145847691e-05, "loss": 0.4914, "step": 5862 }, { "epoch": 1.3078295784073166, "grad_norm": 0.17055629193782806, "learning_rate": 1.213103583485526e-05, "loss": 0.4666, "step": 5863 }, { "epoch": 1.308052643319206, "grad_norm": 0.174026221036911, "learning_rate": 1.2128736405832657e-05, "loss": 0.46, "step": 5864 }, { "epoch": 1.3082757082310952, "grad_norm": 0.17043223977088928, "learning_rate": 1.2126436858907244e-05, "loss": 0.4658, "step": 5865 }, { "epoch": 1.3084987731429847, "grad_norm": 0.1644662320613861, "learning_rate": 1.212413719420638e-05, "loss": 0.4527, "step": 5866 }, { "epoch": 1.308721838054874, "grad_norm": 0.1671665757894516, "learning_rate": 1.212183741185744e-05, "loss": 0.4617, "step": 5867 }, { "epoch": 1.3089449029667632, "grad_norm": 0.2064938247203827, "learning_rate": 1.2119537511987794e-05, "loss": 0.4554, "step": 5868 }, { "epoch": 1.3091679678786527, "grad_norm": 0.1725645214319229, "learning_rate": 1.211723749472483e-05, "loss": 0.4534, "step": 5869 }, { "epoch": 1.309391032790542, "grad_norm": 0.16695371270179749, "learning_rate": 1.2114937360195935e-05, "loss": 0.4517, "step": 5870 }, { "epoch": 1.3096140977024313, "grad_norm": 0.18011373281478882, "learning_rate": 1.2112637108528505e-05, "loss": 0.4576, "step": 5871 }, { "epoch": 1.3098371626143208, "grad_norm": 0.17157688736915588, "learning_rate": 1.2110336739849944e-05, "loss": 0.438, "step": 5872 }, { "epoch": 1.31006022752621, "grad_norm": 0.17350567877292633, "learning_rate": 1.2108036254287658e-05, "loss": 0.4647, "step": 5873 }, { "epoch": 1.3102832924380996, "grad_norm": 0.16434237360954285, "learning_rate": 1.2105735651969066e-05, "loss": 0.4603, "step": 5874 }, { "epoch": 1.3105063573499889, "grad_norm": 0.16745516657829285, "learning_rate": 1.2103434933021587e-05, "loss": 0.4585, "step": 5875 }, { "epoch": 1.3107294222618782, "grad_norm": 0.16241510212421417, "learning_rate": 1.2101134097572654e-05, "loss": 0.4415, "step": 5876 }, { "epoch": 1.3109524871737674, "grad_norm": 0.16632577776908875, "learning_rate": 1.2098833145749691e-05, "loss": 0.4715, "step": 5877 }, { "epoch": 1.311175552085657, "grad_norm": 0.17647215723991394, "learning_rate": 1.2096532077680153e-05, "loss": 0.491, "step": 5878 }, { "epoch": 1.3113986169975462, "grad_norm": 0.1738002449274063, "learning_rate": 1.2094230893491475e-05, "loss": 0.4731, "step": 5879 }, { "epoch": 1.3116216819094357, "grad_norm": 0.16479554772377014, "learning_rate": 1.2091929593311122e-05, "loss": 0.4787, "step": 5880 }, { "epoch": 1.311844746821325, "grad_norm": 0.16516366600990295, "learning_rate": 1.2089628177266545e-05, "loss": 0.4551, "step": 5881 }, { "epoch": 1.3120678117332143, "grad_norm": 0.1664283722639084, "learning_rate": 1.2087326645485218e-05, "loss": 0.4516, "step": 5882 }, { "epoch": 1.3122908766451038, "grad_norm": 0.23761501908302307, "learning_rate": 1.208502499809461e-05, "loss": 0.4679, "step": 5883 }, { "epoch": 1.312513941556993, "grad_norm": 0.17012745141983032, "learning_rate": 1.2082723235222205e-05, "loss": 0.4618, "step": 5884 }, { "epoch": 1.3127370064688826, "grad_norm": 0.1753227859735489, "learning_rate": 1.2080421356995484e-05, "loss": 0.4671, "step": 5885 }, { "epoch": 1.3129600713807719, "grad_norm": 0.17254190146923065, "learning_rate": 1.2078119363541942e-05, "loss": 0.4379, "step": 5886 }, { "epoch": 1.3131831362926611, "grad_norm": 0.1624683141708374, "learning_rate": 1.2075817254989078e-05, "loss": 0.4361, "step": 5887 }, { "epoch": 1.3134062012045504, "grad_norm": 0.16935352981090546, "learning_rate": 1.2073515031464397e-05, "loss": 0.4528, "step": 5888 }, { "epoch": 1.31362926611644, "grad_norm": 0.1754865050315857, "learning_rate": 1.207121269309541e-05, "loss": 0.4503, "step": 5889 }, { "epoch": 1.3138523310283292, "grad_norm": 0.1585618406534195, "learning_rate": 1.2068910240009636e-05, "loss": 0.4401, "step": 5890 }, { "epoch": 1.3140753959402187, "grad_norm": 0.18017402291297913, "learning_rate": 1.20666076723346e-05, "loss": 0.4595, "step": 5891 }, { "epoch": 1.314298460852108, "grad_norm": 0.1608768105506897, "learning_rate": 1.206430499019783e-05, "loss": 0.4414, "step": 5892 }, { "epoch": 1.3145215257639973, "grad_norm": 0.1631273776292801, "learning_rate": 1.2062002193726867e-05, "loss": 0.4567, "step": 5893 }, { "epoch": 1.3147445906758866, "grad_norm": 0.16995559632778168, "learning_rate": 1.2059699283049249e-05, "loss": 0.4611, "step": 5894 }, { "epoch": 1.314967655587776, "grad_norm": 0.1610070914030075, "learning_rate": 1.2057396258292533e-05, "loss": 0.4271, "step": 5895 }, { "epoch": 1.3151907204996653, "grad_norm": 0.16440489888191223, "learning_rate": 1.2055093119584264e-05, "loss": 0.4531, "step": 5896 }, { "epoch": 1.3154137854115548, "grad_norm": 0.16929922997951508, "learning_rate": 1.2052789867052018e-05, "loss": 0.4426, "step": 5897 }, { "epoch": 1.3156368503234441, "grad_norm": 0.17286795377731323, "learning_rate": 1.2050486500823352e-05, "loss": 0.4584, "step": 5898 }, { "epoch": 1.3158599152353334, "grad_norm": 0.1717979460954666, "learning_rate": 1.2048183021025847e-05, "loss": 0.461, "step": 5899 }, { "epoch": 1.316082980147223, "grad_norm": 0.15612578392028809, "learning_rate": 1.2045879427787084e-05, "loss": 0.4466, "step": 5900 }, { "epoch": 1.3163060450591122, "grad_norm": 0.17022433876991272, "learning_rate": 1.2043575721234649e-05, "loss": 0.4376, "step": 5901 }, { "epoch": 1.3165291099710017, "grad_norm": 0.16973432898521423, "learning_rate": 1.2041271901496136e-05, "loss": 0.4606, "step": 5902 }, { "epoch": 1.316752174882891, "grad_norm": 0.1685478538274765, "learning_rate": 1.2038967968699143e-05, "loss": 0.4621, "step": 5903 }, { "epoch": 1.3169752397947803, "grad_norm": 0.16581124067306519, "learning_rate": 1.2036663922971279e-05, "loss": 0.4652, "step": 5904 }, { "epoch": 1.3171983047066695, "grad_norm": 0.17322222888469696, "learning_rate": 1.2034359764440156e-05, "loss": 0.5025, "step": 5905 }, { "epoch": 1.317421369618559, "grad_norm": 0.16943678259849548, "learning_rate": 1.2032055493233394e-05, "loss": 0.4508, "step": 5906 }, { "epoch": 1.3176444345304483, "grad_norm": 0.17897944152355194, "learning_rate": 1.2029751109478614e-05, "loss": 0.4623, "step": 5907 }, { "epoch": 1.3178674994423378, "grad_norm": 0.1740075647830963, "learning_rate": 1.2027446613303454e-05, "loss": 0.4835, "step": 5908 }, { "epoch": 1.3180905643542271, "grad_norm": 0.173087015748024, "learning_rate": 1.2025142004835541e-05, "loss": 0.4713, "step": 5909 }, { "epoch": 1.3183136292661164, "grad_norm": 0.18074947595596313, "learning_rate": 1.2022837284202531e-05, "loss": 0.4621, "step": 5910 }, { "epoch": 1.3185366941780057, "grad_norm": 0.16880744695663452, "learning_rate": 1.2020532451532063e-05, "loss": 0.4486, "step": 5911 }, { "epoch": 1.3187597590898952, "grad_norm": 0.16226506233215332, "learning_rate": 1.2018227506951802e-05, "loss": 0.4502, "step": 5912 }, { "epoch": 1.3189828240017845, "grad_norm": 0.16763652861118317, "learning_rate": 1.2015922450589405e-05, "loss": 0.4588, "step": 5913 }, { "epoch": 1.319205888913674, "grad_norm": 0.1670703887939453, "learning_rate": 1.2013617282572545e-05, "loss": 0.4398, "step": 5914 }, { "epoch": 1.3194289538255632, "grad_norm": 0.18015524744987488, "learning_rate": 1.201131200302889e-05, "loss": 0.4625, "step": 5915 }, { "epoch": 1.3196520187374525, "grad_norm": 0.16982007026672363, "learning_rate": 1.2009006612086128e-05, "loss": 0.4619, "step": 5916 }, { "epoch": 1.319875083649342, "grad_norm": 0.16795918345451355, "learning_rate": 1.2006701109871936e-05, "loss": 0.4577, "step": 5917 }, { "epoch": 1.3200981485612313, "grad_norm": 0.17438872158527374, "learning_rate": 1.2004395496514021e-05, "loss": 0.4668, "step": 5918 }, { "epoch": 1.3203212134731208, "grad_norm": 0.17492419481277466, "learning_rate": 1.2002089772140071e-05, "loss": 0.4592, "step": 5919 }, { "epoch": 1.32054427838501, "grad_norm": 0.17449866235256195, "learning_rate": 1.19997839368778e-05, "loss": 0.4632, "step": 5920 }, { "epoch": 1.3207673432968994, "grad_norm": 0.16483739018440247, "learning_rate": 1.199747799085491e-05, "loss": 0.4496, "step": 5921 }, { "epoch": 1.3209904082087887, "grad_norm": 0.19091255962848663, "learning_rate": 1.1995171934199128e-05, "loss": 0.4647, "step": 5922 }, { "epoch": 1.3212134731206782, "grad_norm": 0.17579780519008636, "learning_rate": 1.1992865767038172e-05, "loss": 0.4545, "step": 5923 }, { "epoch": 1.3214365380325674, "grad_norm": 0.17818951606750488, "learning_rate": 1.1990559489499776e-05, "loss": 0.4806, "step": 5924 }, { "epoch": 1.321659602944457, "grad_norm": 0.17060208320617676, "learning_rate": 1.1988253101711675e-05, "loss": 0.4435, "step": 5925 }, { "epoch": 1.3218826678563462, "grad_norm": 0.17483805119991302, "learning_rate": 1.1985946603801608e-05, "loss": 0.4901, "step": 5926 }, { "epoch": 1.3221057327682355, "grad_norm": 0.17470861971378326, "learning_rate": 1.198363999589733e-05, "loss": 0.462, "step": 5927 }, { "epoch": 1.3223287976801248, "grad_norm": 0.16998319327831268, "learning_rate": 1.1981333278126585e-05, "loss": 0.4469, "step": 5928 }, { "epoch": 1.3225518625920143, "grad_norm": 0.1669076532125473, "learning_rate": 1.1979026450617147e-05, "loss": 0.4422, "step": 5929 }, { "epoch": 1.3227749275039036, "grad_norm": 0.17433854937553406, "learning_rate": 1.197671951349677e-05, "loss": 0.4572, "step": 5930 }, { "epoch": 1.322997992415793, "grad_norm": 0.16878628730773926, "learning_rate": 1.1974412466893237e-05, "loss": 0.4354, "step": 5931 }, { "epoch": 1.3232210573276824, "grad_norm": 0.17208078503608704, "learning_rate": 1.1972105310934318e-05, "loss": 0.4149, "step": 5932 }, { "epoch": 1.3234441222395716, "grad_norm": 0.1662035584449768, "learning_rate": 1.1969798045747805e-05, "loss": 0.4224, "step": 5933 }, { "epoch": 1.3236671871514611, "grad_norm": 0.16633039712905884, "learning_rate": 1.1967490671461484e-05, "loss": 0.4621, "step": 5934 }, { "epoch": 1.3238902520633504, "grad_norm": 0.17780262231826782, "learning_rate": 1.1965183188203154e-05, "loss": 0.4969, "step": 5935 }, { "epoch": 1.32411331697524, "grad_norm": 0.16385456919670105, "learning_rate": 1.1962875596100616e-05, "loss": 0.4476, "step": 5936 }, { "epoch": 1.3243363818871292, "grad_norm": 0.17453844845294952, "learning_rate": 1.1960567895281682e-05, "loss": 0.4784, "step": 5937 }, { "epoch": 1.3245594467990185, "grad_norm": 0.17324872314929962, "learning_rate": 1.1958260085874165e-05, "loss": 0.4734, "step": 5938 }, { "epoch": 1.3247825117109078, "grad_norm": 0.1686837375164032, "learning_rate": 1.1955952168005889e-05, "loss": 0.4497, "step": 5939 }, { "epoch": 1.3250055766227973, "grad_norm": 0.17261843383312225, "learning_rate": 1.1953644141804675e-05, "loss": 0.4805, "step": 5940 }, { "epoch": 1.3252286415346866, "grad_norm": 0.16724753379821777, "learning_rate": 1.1951336007398362e-05, "loss": 0.4568, "step": 5941 }, { "epoch": 1.325451706446576, "grad_norm": 0.17275574803352356, "learning_rate": 1.1949027764914786e-05, "loss": 0.4567, "step": 5942 }, { "epoch": 1.3256747713584653, "grad_norm": 0.16790451109409332, "learning_rate": 1.194671941448179e-05, "loss": 0.4821, "step": 5943 }, { "epoch": 1.3258978362703546, "grad_norm": 0.16740332543849945, "learning_rate": 1.1944410956227233e-05, "loss": 0.4712, "step": 5944 }, { "epoch": 1.326120901182244, "grad_norm": 0.185842826962471, "learning_rate": 1.1942102390278961e-05, "loss": 0.4639, "step": 5945 }, { "epoch": 1.3263439660941334, "grad_norm": 0.1872578263282776, "learning_rate": 1.1939793716764845e-05, "loss": 0.4601, "step": 5946 }, { "epoch": 1.3265670310060227, "grad_norm": 0.17680978775024414, "learning_rate": 1.1937484935812749e-05, "loss": 0.4536, "step": 5947 }, { "epoch": 1.3267900959179122, "grad_norm": 0.1672886312007904, "learning_rate": 1.1935176047550552e-05, "loss": 0.4573, "step": 5948 }, { "epoch": 1.3270131608298015, "grad_norm": 0.16590574383735657, "learning_rate": 1.1932867052106132e-05, "loss": 0.4702, "step": 5949 }, { "epoch": 1.3272362257416908, "grad_norm": 0.15358559787273407, "learning_rate": 1.1930557949607378e-05, "loss": 0.4202, "step": 5950 }, { "epoch": 1.3274592906535803, "grad_norm": 0.16293473541736603, "learning_rate": 1.1928248740182177e-05, "loss": 0.4528, "step": 5951 }, { "epoch": 1.3276823555654695, "grad_norm": 0.16601158678531647, "learning_rate": 1.1925939423958437e-05, "loss": 0.4641, "step": 5952 }, { "epoch": 1.327905420477359, "grad_norm": 0.16637404263019562, "learning_rate": 1.1923630001064052e-05, "loss": 0.4606, "step": 5953 }, { "epoch": 1.3281284853892483, "grad_norm": 0.16997739672660828, "learning_rate": 1.1921320471626939e-05, "loss": 0.4688, "step": 5954 }, { "epoch": 1.3283515503011376, "grad_norm": 0.1634935885667801, "learning_rate": 1.1919010835775015e-05, "loss": 0.4367, "step": 5955 }, { "epoch": 1.3285746152130269, "grad_norm": 0.16118724644184113, "learning_rate": 1.1916701093636196e-05, "loss": 0.4367, "step": 5956 }, { "epoch": 1.3287976801249164, "grad_norm": 0.16823935508728027, "learning_rate": 1.1914391245338417e-05, "loss": 0.4566, "step": 5957 }, { "epoch": 1.3290207450368057, "grad_norm": 0.16638781130313873, "learning_rate": 1.1912081291009608e-05, "loss": 0.4533, "step": 5958 }, { "epoch": 1.3292438099486952, "grad_norm": 0.20548874139785767, "learning_rate": 1.1909771230777709e-05, "loss": 0.4482, "step": 5959 }, { "epoch": 1.3294668748605845, "grad_norm": 0.16751278936862946, "learning_rate": 1.1907461064770667e-05, "loss": 0.4552, "step": 5960 }, { "epoch": 1.3296899397724737, "grad_norm": 0.17895007133483887, "learning_rate": 1.1905150793116433e-05, "loss": 0.4638, "step": 5961 }, { "epoch": 1.329913004684363, "grad_norm": 0.17109131813049316, "learning_rate": 1.1902840415942964e-05, "loss": 0.4386, "step": 5962 }, { "epoch": 1.3301360695962525, "grad_norm": 0.1729149967432022, "learning_rate": 1.1900529933378224e-05, "loss": 0.4626, "step": 5963 }, { "epoch": 1.3303591345081418, "grad_norm": 0.16538968682289124, "learning_rate": 1.189821934555018e-05, "loss": 0.4514, "step": 5964 }, { "epoch": 1.3305821994200313, "grad_norm": 0.1783122420310974, "learning_rate": 1.189590865258681e-05, "loss": 0.4652, "step": 5965 }, { "epoch": 1.3308052643319206, "grad_norm": 0.15844860672950745, "learning_rate": 1.1893597854616092e-05, "loss": 0.4402, "step": 5966 }, { "epoch": 1.3310283292438099, "grad_norm": 0.1638982594013214, "learning_rate": 1.1891286951766014e-05, "loss": 0.4608, "step": 5967 }, { "epoch": 1.3312513941556994, "grad_norm": 0.1644224226474762, "learning_rate": 1.1888975944164567e-05, "loss": 0.4526, "step": 5968 }, { "epoch": 1.3314744590675887, "grad_norm": 0.20200717449188232, "learning_rate": 1.1886664831939751e-05, "loss": 0.4611, "step": 5969 }, { "epoch": 1.3316975239794782, "grad_norm": 0.1918911337852478, "learning_rate": 1.188435361521957e-05, "loss": 0.4704, "step": 5970 }, { "epoch": 1.3319205888913674, "grad_norm": 0.16691647469997406, "learning_rate": 1.1882042294132032e-05, "loss": 0.4787, "step": 5971 }, { "epoch": 1.3321436538032567, "grad_norm": 0.16211196780204773, "learning_rate": 1.1879730868805153e-05, "loss": 0.4535, "step": 5972 }, { "epoch": 1.332366718715146, "grad_norm": 0.16488417983055115, "learning_rate": 1.1877419339366953e-05, "loss": 0.4461, "step": 5973 }, { "epoch": 1.3325897836270355, "grad_norm": 0.16110685467720032, "learning_rate": 1.1875107705945461e-05, "loss": 0.444, "step": 5974 }, { "epoch": 1.3328128485389248, "grad_norm": 0.16213123500347137, "learning_rate": 1.187279596866871e-05, "loss": 0.4324, "step": 5975 }, { "epoch": 1.3330359134508143, "grad_norm": 0.17590108513832092, "learning_rate": 1.1870484127664739e-05, "loss": 0.4416, "step": 5976 }, { "epoch": 1.3332589783627036, "grad_norm": 0.20271827280521393, "learning_rate": 1.186817218306159e-05, "loss": 0.4514, "step": 5977 }, { "epoch": 1.3334820432745929, "grad_norm": 0.17544665932655334, "learning_rate": 1.1865860134987317e-05, "loss": 0.4788, "step": 5978 }, { "epoch": 1.3337051081864821, "grad_norm": 0.2024955302476883, "learning_rate": 1.1863547983569967e-05, "loss": 0.4804, "step": 5979 }, { "epoch": 1.3339281730983716, "grad_norm": 0.17373384535312653, "learning_rate": 1.1861235728937613e-05, "loss": 0.4485, "step": 5980 }, { "epoch": 1.334151238010261, "grad_norm": 0.17016762495040894, "learning_rate": 1.1858923371218314e-05, "loss": 0.4399, "step": 5981 }, { "epoch": 1.3343743029221504, "grad_norm": 0.167991504073143, "learning_rate": 1.1856610910540149e-05, "loss": 0.4612, "step": 5982 }, { "epoch": 1.3345973678340397, "grad_norm": 0.1790834218263626, "learning_rate": 1.185429834703119e-05, "loss": 0.456, "step": 5983 }, { "epoch": 1.334820432745929, "grad_norm": 0.16934290528297424, "learning_rate": 1.1851985680819526e-05, "loss": 0.4717, "step": 5984 }, { "epoch": 1.3350434976578185, "grad_norm": 0.1672845035791397, "learning_rate": 1.1849672912033245e-05, "loss": 0.4748, "step": 5985 }, { "epoch": 1.3352665625697078, "grad_norm": 0.16489911079406738, "learning_rate": 1.1847360040800442e-05, "loss": 0.416, "step": 5986 }, { "epoch": 1.3354896274815973, "grad_norm": 0.18953464925289154, "learning_rate": 1.184504706724922e-05, "loss": 0.4601, "step": 5987 }, { "epoch": 1.3357126923934866, "grad_norm": 0.16453106701374054, "learning_rate": 1.1842733991507687e-05, "loss": 0.4417, "step": 5988 }, { "epoch": 1.3359357573053758, "grad_norm": 0.1774827390909195, "learning_rate": 1.1840420813703955e-05, "loss": 0.4459, "step": 5989 }, { "epoch": 1.3361588222172651, "grad_norm": 0.1909363716840744, "learning_rate": 1.183810753396614e-05, "loss": 0.4695, "step": 5990 }, { "epoch": 1.3363818871291546, "grad_norm": 0.18591512739658356, "learning_rate": 1.1835794152422366e-05, "loss": 0.4579, "step": 5991 }, { "epoch": 1.336604952041044, "grad_norm": 0.1766308695077896, "learning_rate": 1.1833480669200765e-05, "loss": 0.4595, "step": 5992 }, { "epoch": 1.3368280169529334, "grad_norm": 0.16689802706241608, "learning_rate": 1.1831167084429474e-05, "loss": 0.4371, "step": 5993 }, { "epoch": 1.3370510818648227, "grad_norm": 0.180791437625885, "learning_rate": 1.1828853398236629e-05, "loss": 0.4773, "step": 5994 }, { "epoch": 1.337274146776712, "grad_norm": 0.18029746413230896, "learning_rate": 1.182653961075038e-05, "loss": 0.4281, "step": 5995 }, { "epoch": 1.3374972116886013, "grad_norm": 0.16131596267223358, "learning_rate": 1.1824225722098877e-05, "loss": 0.4479, "step": 5996 }, { "epoch": 1.3377202766004908, "grad_norm": 0.16809871792793274, "learning_rate": 1.182191173241028e-05, "loss": 0.4473, "step": 5997 }, { "epoch": 1.33794334151238, "grad_norm": 0.17205321788787842, "learning_rate": 1.181959764181275e-05, "loss": 0.4357, "step": 5998 }, { "epoch": 1.3381664064242695, "grad_norm": 0.17919965088367462, "learning_rate": 1.1817283450434459e-05, "loss": 0.4892, "step": 5999 }, { "epoch": 1.3383894713361588, "grad_norm": 0.16656439006328583, "learning_rate": 1.1814969158403575e-05, "loss": 0.4686, "step": 6000 }, { "epoch": 1.338612536248048, "grad_norm": 0.16457730531692505, "learning_rate": 1.1812654765848285e-05, "loss": 0.4346, "step": 6001 }, { "epoch": 1.3388356011599376, "grad_norm": 0.17078131437301636, "learning_rate": 1.1810340272896772e-05, "loss": 0.4567, "step": 6002 }, { "epoch": 1.3390586660718269, "grad_norm": 0.16368982195854187, "learning_rate": 1.1808025679677229e-05, "loss": 0.451, "step": 6003 }, { "epoch": 1.3392817309837164, "grad_norm": 0.16397084295749664, "learning_rate": 1.1805710986317846e-05, "loss": 0.4541, "step": 6004 }, { "epoch": 1.3395047958956057, "grad_norm": 0.16752174496650696, "learning_rate": 1.1803396192946835e-05, "loss": 0.4714, "step": 6005 }, { "epoch": 1.339727860807495, "grad_norm": 0.17410169541835785, "learning_rate": 1.1801081299692396e-05, "loss": 0.4782, "step": 6006 }, { "epoch": 1.3399509257193842, "grad_norm": 0.16793891787528992, "learning_rate": 1.1798766306682746e-05, "loss": 0.4733, "step": 6007 }, { "epoch": 1.3401739906312737, "grad_norm": 0.17633196711540222, "learning_rate": 1.1796451214046106e-05, "loss": 0.4668, "step": 6008 }, { "epoch": 1.340397055543163, "grad_norm": 0.17554394900798798, "learning_rate": 1.1794136021910694e-05, "loss": 0.4375, "step": 6009 }, { "epoch": 1.3406201204550525, "grad_norm": 0.1725650429725647, "learning_rate": 1.1791820730404746e-05, "loss": 0.46, "step": 6010 }, { "epoch": 1.3408431853669418, "grad_norm": 0.17213909327983856, "learning_rate": 1.1789505339656493e-05, "loss": 0.463, "step": 6011 }, { "epoch": 1.341066250278831, "grad_norm": 0.17620781064033508, "learning_rate": 1.1787189849794178e-05, "loss": 0.4817, "step": 6012 }, { "epoch": 1.3412893151907204, "grad_norm": 0.1637295037508011, "learning_rate": 1.1784874260946048e-05, "loss": 0.4883, "step": 6013 }, { "epoch": 1.3415123801026099, "grad_norm": 0.17048433423042297, "learning_rate": 1.1782558573240355e-05, "loss": 0.4333, "step": 6014 }, { "epoch": 1.3417354450144992, "grad_norm": 0.17557942867279053, "learning_rate": 1.1780242786805353e-05, "loss": 0.4704, "step": 6015 }, { "epoch": 1.3419585099263887, "grad_norm": 0.17155835032463074, "learning_rate": 1.177792690176931e-05, "loss": 0.4627, "step": 6016 }, { "epoch": 1.342181574838278, "grad_norm": 0.1626436561346054, "learning_rate": 1.177561091826049e-05, "loss": 0.4703, "step": 6017 }, { "epoch": 1.3424046397501672, "grad_norm": 0.18032924830913544, "learning_rate": 1.177329483640717e-05, "loss": 0.4727, "step": 6018 }, { "epoch": 1.3426277046620567, "grad_norm": 0.18225564062595367, "learning_rate": 1.1770978656337626e-05, "loss": 0.4737, "step": 6019 }, { "epoch": 1.342850769573946, "grad_norm": 0.22427327930927277, "learning_rate": 1.1768662378180144e-05, "loss": 0.4353, "step": 6020 }, { "epoch": 1.3430738344858355, "grad_norm": 0.16713419556617737, "learning_rate": 1.1766346002063017e-05, "loss": 0.4674, "step": 6021 }, { "epoch": 1.3432968993977248, "grad_norm": 0.17510871589183807, "learning_rate": 1.1764029528114533e-05, "loss": 0.4907, "step": 6022 }, { "epoch": 1.343519964309614, "grad_norm": 0.20499172806739807, "learning_rate": 1.1761712956463003e-05, "loss": 0.4798, "step": 6023 }, { "epoch": 1.3437430292215033, "grad_norm": 0.17217415571212769, "learning_rate": 1.1759396287236721e-05, "loss": 0.4642, "step": 6024 }, { "epoch": 1.3439660941333929, "grad_norm": 0.17448726296424866, "learning_rate": 1.1757079520564012e-05, "loss": 0.4569, "step": 6025 }, { "epoch": 1.3441891590452821, "grad_norm": 0.16537372767925262, "learning_rate": 1.1754762656573182e-05, "loss": 0.4348, "step": 6026 }, { "epoch": 1.3444122239571716, "grad_norm": 0.16384288668632507, "learning_rate": 1.1752445695392563e-05, "loss": 0.444, "step": 6027 }, { "epoch": 1.344635288869061, "grad_norm": 0.25420817732810974, "learning_rate": 1.1750128637150473e-05, "loss": 0.4489, "step": 6028 }, { "epoch": 1.3448583537809502, "grad_norm": 0.17444314062595367, "learning_rate": 1.1747811481975253e-05, "loss": 0.4641, "step": 6029 }, { "epoch": 1.3450814186928395, "grad_norm": 0.17278996109962463, "learning_rate": 1.1745494229995237e-05, "loss": 0.4622, "step": 6030 }, { "epoch": 1.345304483604729, "grad_norm": 0.1730216145515442, "learning_rate": 1.1743176881338773e-05, "loss": 0.4413, "step": 6031 }, { "epoch": 1.3455275485166183, "grad_norm": 0.1769610494375229, "learning_rate": 1.174085943613421e-05, "loss": 0.4739, "step": 6032 }, { "epoch": 1.3457506134285078, "grad_norm": 0.17104336619377136, "learning_rate": 1.1738541894509898e-05, "loss": 0.4513, "step": 6033 }, { "epoch": 1.345973678340397, "grad_norm": 0.1742696762084961, "learning_rate": 1.17362242565942e-05, "loss": 0.4549, "step": 6034 }, { "epoch": 1.3461967432522863, "grad_norm": 0.16949966549873352, "learning_rate": 1.1733906522515483e-05, "loss": 0.4773, "step": 6035 }, { "epoch": 1.3464198081641758, "grad_norm": 0.17838764190673828, "learning_rate": 1.1731588692402114e-05, "loss": 0.47, "step": 6036 }, { "epoch": 1.3466428730760651, "grad_norm": 0.1655292510986328, "learning_rate": 1.1729270766382474e-05, "loss": 0.4577, "step": 6037 }, { "epoch": 1.3468659379879546, "grad_norm": 0.1620989590883255, "learning_rate": 1.172695274458494e-05, "loss": 0.427, "step": 6038 }, { "epoch": 1.347089002899844, "grad_norm": 0.15767668187618256, "learning_rate": 1.1724634627137896e-05, "loss": 0.4419, "step": 6039 }, { "epoch": 1.3473120678117332, "grad_norm": 0.17484217882156372, "learning_rate": 1.1722316414169743e-05, "loss": 0.4221, "step": 6040 }, { "epoch": 1.3475351327236225, "grad_norm": 0.16977369785308838, "learning_rate": 1.1719998105808871e-05, "loss": 0.4639, "step": 6041 }, { "epoch": 1.347758197635512, "grad_norm": 0.16105391085147858, "learning_rate": 1.1717679702183687e-05, "loss": 0.4707, "step": 6042 }, { "epoch": 1.3479812625474012, "grad_norm": 0.16509099304676056, "learning_rate": 1.1715361203422595e-05, "loss": 0.4378, "step": 6043 }, { "epoch": 1.3482043274592908, "grad_norm": 0.18078747391700745, "learning_rate": 1.1713042609654008e-05, "loss": 0.4565, "step": 6044 }, { "epoch": 1.34842739237118, "grad_norm": 0.17105768620967865, "learning_rate": 1.1710723921006348e-05, "loss": 0.4574, "step": 6045 }, { "epoch": 1.3486504572830693, "grad_norm": 0.17831863462924957, "learning_rate": 1.1708405137608036e-05, "loss": 0.4789, "step": 6046 }, { "epoch": 1.3488735221949586, "grad_norm": 0.17648382484912872, "learning_rate": 1.1706086259587499e-05, "loss": 0.4757, "step": 6047 }, { "epoch": 1.349096587106848, "grad_norm": 0.1652226746082306, "learning_rate": 1.1703767287073177e-05, "loss": 0.4559, "step": 6048 }, { "epoch": 1.3493196520187374, "grad_norm": 0.16818653047084808, "learning_rate": 1.1701448220193503e-05, "loss": 0.4739, "step": 6049 }, { "epoch": 1.3495427169306269, "grad_norm": 0.17042328417301178, "learning_rate": 1.1699129059076925e-05, "loss": 0.4512, "step": 6050 }, { "epoch": 1.3497657818425162, "grad_norm": 0.16678601503372192, "learning_rate": 1.1696809803851891e-05, "loss": 0.4454, "step": 6051 }, { "epoch": 1.3499888467544054, "grad_norm": 0.1808800995349884, "learning_rate": 1.1694490454646858e-05, "loss": 0.4656, "step": 6052 }, { "epoch": 1.350211911666295, "grad_norm": 0.17242035269737244, "learning_rate": 1.1692171011590282e-05, "loss": 0.4635, "step": 6053 }, { "epoch": 1.3504349765781842, "grad_norm": 0.16750234365463257, "learning_rate": 1.1689851474810636e-05, "loss": 0.4627, "step": 6054 }, { "epoch": 1.3506580414900737, "grad_norm": 0.1674799621105194, "learning_rate": 1.1687531844436382e-05, "loss": 0.4452, "step": 6055 }, { "epoch": 1.350881106401963, "grad_norm": 0.17459098994731903, "learning_rate": 1.1685212120596001e-05, "loss": 0.467, "step": 6056 }, { "epoch": 1.3511041713138523, "grad_norm": 0.17721140384674072, "learning_rate": 1.168289230341797e-05, "loss": 0.4788, "step": 6057 }, { "epoch": 1.3513272362257416, "grad_norm": 0.167550191283226, "learning_rate": 1.168057239303078e-05, "loss": 0.4488, "step": 6058 }, { "epoch": 1.351550301137631, "grad_norm": 0.17352843284606934, "learning_rate": 1.1678252389562919e-05, "loss": 0.4582, "step": 6059 }, { "epoch": 1.3517733660495204, "grad_norm": 0.1822769194841385, "learning_rate": 1.1675932293142882e-05, "loss": 0.4597, "step": 6060 }, { "epoch": 1.3519964309614099, "grad_norm": 0.16711196303367615, "learning_rate": 1.1673612103899176e-05, "loss": 0.4624, "step": 6061 }, { "epoch": 1.3522194958732991, "grad_norm": 0.16815169155597687, "learning_rate": 1.16712918219603e-05, "loss": 0.4416, "step": 6062 }, { "epoch": 1.3524425607851884, "grad_norm": 0.17060011625289917, "learning_rate": 1.1668971447454775e-05, "loss": 0.4472, "step": 6063 }, { "epoch": 1.3526656256970777, "grad_norm": 0.1819947212934494, "learning_rate": 1.1666650980511112e-05, "loss": 0.4791, "step": 6064 }, { "epoch": 1.3528886906089672, "grad_norm": 0.16703422367572784, "learning_rate": 1.1664330421257835e-05, "loss": 0.4596, "step": 6065 }, { "epoch": 1.3531117555208565, "grad_norm": 0.16398312151432037, "learning_rate": 1.1662009769823466e-05, "loss": 0.46, "step": 6066 }, { "epoch": 1.353334820432746, "grad_norm": 0.177035391330719, "learning_rate": 1.1659689026336544e-05, "loss": 0.4566, "step": 6067 }, { "epoch": 1.3535578853446353, "grad_norm": 0.17809878289699554, "learning_rate": 1.1657368190925602e-05, "loss": 0.4687, "step": 6068 }, { "epoch": 1.3537809502565246, "grad_norm": 0.17178352177143097, "learning_rate": 1.1655047263719188e-05, "loss": 0.4609, "step": 6069 }, { "epoch": 1.354004015168414, "grad_norm": 0.16717985272407532, "learning_rate": 1.1652726244845843e-05, "loss": 0.4341, "step": 6070 }, { "epoch": 1.3542270800803033, "grad_norm": 0.1680830717086792, "learning_rate": 1.1650405134434122e-05, "loss": 0.4286, "step": 6071 }, { "epoch": 1.3544501449921929, "grad_norm": 0.1840554177761078, "learning_rate": 1.1648083932612584e-05, "loss": 0.4781, "step": 6072 }, { "epoch": 1.3546732099040821, "grad_norm": 0.16866901516914368, "learning_rate": 1.164576263950979e-05, "loss": 0.4588, "step": 6073 }, { "epoch": 1.3548962748159714, "grad_norm": 0.18350256979465485, "learning_rate": 1.164344125525431e-05, "loss": 0.4828, "step": 6074 }, { "epoch": 1.3551193397278607, "grad_norm": 0.18490628898143768, "learning_rate": 1.1641119779974717e-05, "loss": 0.4127, "step": 6075 }, { "epoch": 1.3553424046397502, "grad_norm": 0.1628909409046173, "learning_rate": 1.1638798213799585e-05, "loss": 0.4403, "step": 6076 }, { "epoch": 1.3555654695516395, "grad_norm": 0.17124617099761963, "learning_rate": 1.1636476556857502e-05, "loss": 0.4638, "step": 6077 }, { "epoch": 1.355788534463529, "grad_norm": 0.18702422082424164, "learning_rate": 1.1634154809277052e-05, "loss": 0.4675, "step": 6078 }, { "epoch": 1.3560115993754183, "grad_norm": 0.174610435962677, "learning_rate": 1.1631832971186827e-05, "loss": 0.448, "step": 6079 }, { "epoch": 1.3562346642873075, "grad_norm": 0.165745347738266, "learning_rate": 1.162951104271543e-05, "loss": 0.4722, "step": 6080 }, { "epoch": 1.356457729199197, "grad_norm": 0.1741677075624466, "learning_rate": 1.162718902399146e-05, "loss": 0.4478, "step": 6081 }, { "epoch": 1.3566807941110863, "grad_norm": 0.17158982157707214, "learning_rate": 1.162486691514353e-05, "loss": 0.4594, "step": 6082 }, { "epoch": 1.3569038590229756, "grad_norm": 0.16993384063243866, "learning_rate": 1.1622544716300245e-05, "loss": 0.4304, "step": 6083 }, { "epoch": 1.3571269239348651, "grad_norm": 0.1746159791946411, "learning_rate": 1.1620222427590232e-05, "loss": 0.4395, "step": 6084 }, { "epoch": 1.3573499888467544, "grad_norm": 0.18195772171020508, "learning_rate": 1.1617900049142105e-05, "loss": 0.449, "step": 6085 }, { "epoch": 1.3575730537586437, "grad_norm": 0.19646844267845154, "learning_rate": 1.16155775810845e-05, "loss": 0.4391, "step": 6086 }, { "epoch": 1.3577961186705332, "grad_norm": 0.17097879946231842, "learning_rate": 1.1613255023546043e-05, "loss": 0.4744, "step": 6087 }, { "epoch": 1.3580191835824225, "grad_norm": 0.18478180468082428, "learning_rate": 1.1610932376655377e-05, "loss": 0.5241, "step": 6088 }, { "epoch": 1.358242248494312, "grad_norm": 0.16517724096775055, "learning_rate": 1.1608609640541142e-05, "loss": 0.465, "step": 6089 }, { "epoch": 1.3584653134062012, "grad_norm": 0.17419113218784332, "learning_rate": 1.1606286815331988e-05, "loss": 0.4501, "step": 6090 }, { "epoch": 1.3586883783180905, "grad_norm": 0.18028919398784637, "learning_rate": 1.1603963901156563e-05, "loss": 0.5103, "step": 6091 }, { "epoch": 1.3589114432299798, "grad_norm": 0.17037071287631989, "learning_rate": 1.1601640898143529e-05, "loss": 0.48, "step": 6092 }, { "epoch": 1.3591345081418693, "grad_norm": 0.1665482372045517, "learning_rate": 1.1599317806421548e-05, "loss": 0.4826, "step": 6093 }, { "epoch": 1.3593575730537586, "grad_norm": 0.18023498356342316, "learning_rate": 1.1596994626119287e-05, "loss": 0.4615, "step": 6094 }, { "epoch": 1.359580637965648, "grad_norm": 0.16920949518680573, "learning_rate": 1.159467135736542e-05, "loss": 0.4777, "step": 6095 }, { "epoch": 1.3598037028775374, "grad_norm": 0.17280833423137665, "learning_rate": 1.1592348000288618e-05, "loss": 0.4691, "step": 6096 }, { "epoch": 1.3600267677894267, "grad_norm": 0.17000465095043182, "learning_rate": 1.1590024555017571e-05, "loss": 0.44, "step": 6097 }, { "epoch": 1.3602498327013162, "grad_norm": 0.1656549870967865, "learning_rate": 1.1587701021680959e-05, "loss": 0.4266, "step": 6098 }, { "epoch": 1.3604728976132054, "grad_norm": 0.1633346974849701, "learning_rate": 1.1585377400407483e-05, "loss": 0.3994, "step": 6099 }, { "epoch": 1.3606959625250947, "grad_norm": 0.1661108434200287, "learning_rate": 1.1583053691325829e-05, "loss": 0.4668, "step": 6100 }, { "epoch": 1.3609190274369842, "grad_norm": 0.17027215659618378, "learning_rate": 1.1580729894564706e-05, "loss": 0.4536, "step": 6101 }, { "epoch": 1.3611420923488735, "grad_norm": 0.17399293184280396, "learning_rate": 1.1578406010252818e-05, "loss": 0.4718, "step": 6102 }, { "epoch": 1.3613651572607628, "grad_norm": 0.16790112853050232, "learning_rate": 1.1576082038518876e-05, "loss": 0.4487, "step": 6103 }, { "epoch": 1.3615882221726523, "grad_norm": 0.16512347757816315, "learning_rate": 1.15737579794916e-05, "loss": 0.4497, "step": 6104 }, { "epoch": 1.3618112870845416, "grad_norm": 0.16861163079738617, "learning_rate": 1.1571433833299703e-05, "loss": 0.4552, "step": 6105 }, { "epoch": 1.362034351996431, "grad_norm": 0.16993777453899384, "learning_rate": 1.156910960007192e-05, "loss": 0.4651, "step": 6106 }, { "epoch": 1.3622574169083204, "grad_norm": 0.16158044338226318, "learning_rate": 1.1566785279936972e-05, "loss": 0.4361, "step": 6107 }, { "epoch": 1.3624804818202096, "grad_norm": 0.17330992221832275, "learning_rate": 1.1564460873023604e-05, "loss": 0.4651, "step": 6108 }, { "epoch": 1.362703546732099, "grad_norm": 0.16832031309604645, "learning_rate": 1.156213637946055e-05, "loss": 0.4802, "step": 6109 }, { "epoch": 1.3629266116439884, "grad_norm": 0.17535291612148285, "learning_rate": 1.1559811799376557e-05, "loss": 0.4598, "step": 6110 }, { "epoch": 1.3631496765558777, "grad_norm": 0.17599309980869293, "learning_rate": 1.1557487132900376e-05, "loss": 0.4568, "step": 6111 }, { "epoch": 1.3633727414677672, "grad_norm": 0.18158310651779175, "learning_rate": 1.155516238016076e-05, "loss": 0.4597, "step": 6112 }, { "epoch": 1.3635958063796565, "grad_norm": 0.1722884625196457, "learning_rate": 1.1552837541286468e-05, "loss": 0.4703, "step": 6113 }, { "epoch": 1.3638188712915458, "grad_norm": 0.16803938150405884, "learning_rate": 1.1550512616406269e-05, "loss": 0.4524, "step": 6114 }, { "epoch": 1.3640419362034353, "grad_norm": 0.16398029029369354, "learning_rate": 1.1548187605648923e-05, "loss": 0.476, "step": 6115 }, { "epoch": 1.3642650011153246, "grad_norm": 0.17081895470619202, "learning_rate": 1.1545862509143212e-05, "loss": 0.4794, "step": 6116 }, { "epoch": 1.3644880660272138, "grad_norm": 0.16784706711769104, "learning_rate": 1.1543537327017911e-05, "loss": 0.4522, "step": 6117 }, { "epoch": 1.3647111309391033, "grad_norm": 0.1711539328098297, "learning_rate": 1.1541212059401806e-05, "loss": 0.4607, "step": 6118 }, { "epoch": 1.3649341958509926, "grad_norm": 0.17196914553642273, "learning_rate": 1.1538886706423678e-05, "loss": 0.4689, "step": 6119 }, { "epoch": 1.365157260762882, "grad_norm": 0.17912612855434418, "learning_rate": 1.1536561268212328e-05, "loss": 0.4616, "step": 6120 }, { "epoch": 1.3653803256747714, "grad_norm": 0.16970542073249817, "learning_rate": 1.1534235744896547e-05, "loss": 0.4481, "step": 6121 }, { "epoch": 1.3656033905866607, "grad_norm": 0.17101642489433289, "learning_rate": 1.153191013660514e-05, "loss": 0.4507, "step": 6122 }, { "epoch": 1.3658264554985502, "grad_norm": 0.16778020560741425, "learning_rate": 1.1529584443466915e-05, "loss": 0.4473, "step": 6123 }, { "epoch": 1.3660495204104395, "grad_norm": 0.16892312467098236, "learning_rate": 1.1527258665610681e-05, "loss": 0.4837, "step": 6124 }, { "epoch": 1.3662725853223288, "grad_norm": 0.1817074865102768, "learning_rate": 1.1524932803165254e-05, "loss": 0.4611, "step": 6125 }, { "epoch": 1.366495650234218, "grad_norm": 0.1750434935092926, "learning_rate": 1.1522606856259457e-05, "loss": 0.4877, "step": 6126 }, { "epoch": 1.3667187151461075, "grad_norm": 0.16866067051887512, "learning_rate": 1.1520280825022116e-05, "loss": 0.4593, "step": 6127 }, { "epoch": 1.3669417800579968, "grad_norm": 0.16479606926441193, "learning_rate": 1.1517954709582058e-05, "loss": 0.4685, "step": 6128 }, { "epoch": 1.3671648449698863, "grad_norm": 0.16955013573169708, "learning_rate": 1.1515628510068122e-05, "loss": 0.4905, "step": 6129 }, { "epoch": 1.3673879098817756, "grad_norm": 0.1888909786939621, "learning_rate": 1.1513302226609144e-05, "loss": 0.4382, "step": 6130 }, { "epoch": 1.367610974793665, "grad_norm": 0.179367333650589, "learning_rate": 1.151097585933397e-05, "loss": 0.4522, "step": 6131 }, { "epoch": 1.3678340397055544, "grad_norm": 0.16989760100841522, "learning_rate": 1.1508649408371448e-05, "loss": 0.4614, "step": 6132 }, { "epoch": 1.3680571046174437, "grad_norm": 0.17462711036205292, "learning_rate": 1.1506322873850434e-05, "loss": 0.4512, "step": 6133 }, { "epoch": 1.368280169529333, "grad_norm": 0.18676964938640594, "learning_rate": 1.1503996255899783e-05, "loss": 0.4778, "step": 6134 }, { "epoch": 1.3685032344412225, "grad_norm": 0.1641889065504074, "learning_rate": 1.1501669554648359e-05, "loss": 0.4562, "step": 6135 }, { "epoch": 1.3687262993531117, "grad_norm": 0.1635216772556305, "learning_rate": 1.1499342770225028e-05, "loss": 0.456, "step": 6136 }, { "epoch": 1.368949364265001, "grad_norm": 0.20571352541446686, "learning_rate": 1.1497015902758663e-05, "loss": 0.4625, "step": 6137 }, { "epoch": 1.3691724291768905, "grad_norm": 0.1723473221063614, "learning_rate": 1.1494688952378141e-05, "loss": 0.4592, "step": 6138 }, { "epoch": 1.3693954940887798, "grad_norm": 0.167448028922081, "learning_rate": 1.1492361919212345e-05, "loss": 0.454, "step": 6139 }, { "epoch": 1.3696185590006693, "grad_norm": 0.16840708255767822, "learning_rate": 1.1490034803390157e-05, "loss": 0.4703, "step": 6140 }, { "epoch": 1.3698416239125586, "grad_norm": 0.17156104743480682, "learning_rate": 1.148770760504047e-05, "loss": 0.4603, "step": 6141 }, { "epoch": 1.3700646888244479, "grad_norm": 0.16438238322734833, "learning_rate": 1.1485380324292175e-05, "loss": 0.4521, "step": 6142 }, { "epoch": 1.3702877537363372, "grad_norm": 0.17488059401512146, "learning_rate": 1.1483052961274177e-05, "loss": 0.4845, "step": 6143 }, { "epoch": 1.3705108186482267, "grad_norm": 0.16744007170200348, "learning_rate": 1.1480725516115374e-05, "loss": 0.441, "step": 6144 }, { "epoch": 1.370733883560116, "grad_norm": 0.16667571663856506, "learning_rate": 1.1478397988944683e-05, "loss": 0.4253, "step": 6145 }, { "epoch": 1.3709569484720054, "grad_norm": 0.1638866513967514, "learning_rate": 1.1476070379891009e-05, "loss": 0.4539, "step": 6146 }, { "epoch": 1.3711800133838947, "grad_norm": 0.2004345804452896, "learning_rate": 1.1473742689083271e-05, "loss": 0.4615, "step": 6147 }, { "epoch": 1.371403078295784, "grad_norm": 0.17425598204135895, "learning_rate": 1.1471414916650397e-05, "loss": 0.4428, "step": 6148 }, { "epoch": 1.3716261432076735, "grad_norm": 0.1777925044298172, "learning_rate": 1.1469087062721305e-05, "loss": 0.4741, "step": 6149 }, { "epoch": 1.3718492081195628, "grad_norm": 0.17382198572158813, "learning_rate": 1.146675912742493e-05, "loss": 0.4744, "step": 6150 }, { "epoch": 1.372072273031452, "grad_norm": 0.18798965215682983, "learning_rate": 1.146443111089021e-05, "loss": 0.4216, "step": 6151 }, { "epoch": 1.3722953379433416, "grad_norm": 0.1665268987417221, "learning_rate": 1.1462103013246086e-05, "loss": 0.4669, "step": 6152 }, { "epoch": 1.3725184028552309, "grad_norm": 0.1658693253993988, "learning_rate": 1.1459774834621498e-05, "loss": 0.444, "step": 6153 }, { "epoch": 1.3727414677671201, "grad_norm": 0.16875915229320526, "learning_rate": 1.1457446575145397e-05, "loss": 0.46, "step": 6154 }, { "epoch": 1.3729645326790096, "grad_norm": 0.17175830900669098, "learning_rate": 1.1455118234946737e-05, "loss": 0.4879, "step": 6155 }, { "epoch": 1.373187597590899, "grad_norm": 0.17912839353084564, "learning_rate": 1.1452789814154475e-05, "loss": 0.4488, "step": 6156 }, { "epoch": 1.3734106625027884, "grad_norm": 0.16743507981300354, "learning_rate": 1.1450461312897576e-05, "loss": 0.4565, "step": 6157 }, { "epoch": 1.3736337274146777, "grad_norm": 0.1700303554534912, "learning_rate": 1.1448132731305005e-05, "loss": 0.4278, "step": 6158 }, { "epoch": 1.373856792326567, "grad_norm": 0.17981837689876556, "learning_rate": 1.1445804069505735e-05, "loss": 0.4379, "step": 6159 }, { "epoch": 1.3740798572384563, "grad_norm": 0.172749325633049, "learning_rate": 1.1443475327628739e-05, "loss": 0.4344, "step": 6160 }, { "epoch": 1.3743029221503458, "grad_norm": 0.16664694249629974, "learning_rate": 1.1441146505803003e-05, "loss": 0.4574, "step": 6161 }, { "epoch": 1.374525987062235, "grad_norm": 0.1706552803516388, "learning_rate": 1.1438817604157506e-05, "loss": 0.4525, "step": 6162 }, { "epoch": 1.3747490519741246, "grad_norm": 0.17497579753398895, "learning_rate": 1.1436488622821243e-05, "loss": 0.4882, "step": 6163 }, { "epoch": 1.3749721168860138, "grad_norm": 0.17119908332824707, "learning_rate": 1.14341595619232e-05, "loss": 0.4728, "step": 6164 }, { "epoch": 1.3751951817979031, "grad_norm": 0.1758640706539154, "learning_rate": 1.143183042159238e-05, "loss": 0.4205, "step": 6165 }, { "epoch": 1.3754182467097926, "grad_norm": 0.17280808091163635, "learning_rate": 1.1429501201957785e-05, "loss": 0.4964, "step": 6166 }, { "epoch": 1.375641311621682, "grad_norm": 0.17379097640514374, "learning_rate": 1.1427171903148425e-05, "loss": 0.4624, "step": 6167 }, { "epoch": 1.3758643765335712, "grad_norm": 0.17071013152599335, "learning_rate": 1.14248425252933e-05, "loss": 0.4503, "step": 6168 }, { "epoch": 1.3760874414454607, "grad_norm": 0.18627150356769562, "learning_rate": 1.1422513068521442e-05, "loss": 0.4476, "step": 6169 }, { "epoch": 1.37631050635735, "grad_norm": 0.1711094081401825, "learning_rate": 1.1420183532961855e-05, "loss": 0.445, "step": 6170 }, { "epoch": 1.3765335712692393, "grad_norm": 0.16976723074913025, "learning_rate": 1.1417853918743576e-05, "loss": 0.4638, "step": 6171 }, { "epoch": 1.3767566361811288, "grad_norm": 0.15716324746608734, "learning_rate": 1.1415524225995624e-05, "loss": 0.4555, "step": 6172 }, { "epoch": 1.376979701093018, "grad_norm": 0.17595480382442474, "learning_rate": 1.1413194454847041e-05, "loss": 0.4777, "step": 6173 }, { "epoch": 1.3772027660049075, "grad_norm": 0.1665569245815277, "learning_rate": 1.1410864605426856e-05, "loss": 0.4478, "step": 6174 }, { "epoch": 1.3774258309167968, "grad_norm": 0.16966520249843597, "learning_rate": 1.1408534677864119e-05, "loss": 0.4626, "step": 6175 }, { "epoch": 1.377648895828686, "grad_norm": 0.17505811154842377, "learning_rate": 1.1406204672287867e-05, "loss": 0.4595, "step": 6176 }, { "epoch": 1.3778719607405754, "grad_norm": 0.1676858365535736, "learning_rate": 1.1403874588827156e-05, "loss": 0.4451, "step": 6177 }, { "epoch": 1.3780950256524649, "grad_norm": 0.1657474786043167, "learning_rate": 1.1401544427611037e-05, "loss": 0.4268, "step": 6178 }, { "epoch": 1.3783180905643542, "grad_norm": 0.16758085787296295, "learning_rate": 1.1399214188768574e-05, "loss": 0.4783, "step": 6179 }, { "epoch": 1.3785411554762437, "grad_norm": 0.17600210011005402, "learning_rate": 1.139688387242883e-05, "loss": 0.4546, "step": 6180 }, { "epoch": 1.378764220388133, "grad_norm": 0.17735408246517181, "learning_rate": 1.1394553478720868e-05, "loss": 0.4663, "step": 6181 }, { "epoch": 1.3789872853000222, "grad_norm": 0.17437157034873962, "learning_rate": 1.1392223007773764e-05, "loss": 0.4914, "step": 6182 }, { "epoch": 1.3792103502119117, "grad_norm": 0.16675308346748352, "learning_rate": 1.1389892459716589e-05, "loss": 0.463, "step": 6183 }, { "epoch": 1.379433415123801, "grad_norm": 0.17255185544490814, "learning_rate": 1.138756183467843e-05, "loss": 0.4716, "step": 6184 }, { "epoch": 1.3796564800356905, "grad_norm": 0.16526752710342407, "learning_rate": 1.1385231132788368e-05, "loss": 0.427, "step": 6185 }, { "epoch": 1.3798795449475798, "grad_norm": 0.16648603975772858, "learning_rate": 1.1382900354175494e-05, "loss": 0.4393, "step": 6186 }, { "epoch": 1.380102609859469, "grad_norm": 0.16615265607833862, "learning_rate": 1.1380569498968896e-05, "loss": 0.4493, "step": 6187 }, { "epoch": 1.3803256747713584, "grad_norm": 0.19119910895824432, "learning_rate": 1.1378238567297677e-05, "loss": 0.4583, "step": 6188 }, { "epoch": 1.3805487396832479, "grad_norm": 0.17033104598522186, "learning_rate": 1.1375907559290935e-05, "loss": 0.4919, "step": 6189 }, { "epoch": 1.3807718045951372, "grad_norm": 0.16820746660232544, "learning_rate": 1.1373576475077778e-05, "loss": 0.4316, "step": 6190 }, { "epoch": 1.3809948695070267, "grad_norm": 0.17084772884845734, "learning_rate": 1.1371245314787318e-05, "loss": 0.4225, "step": 6191 }, { "epoch": 1.381217934418916, "grad_norm": 0.18454383313655853, "learning_rate": 1.1368914078548666e-05, "loss": 0.4675, "step": 6192 }, { "epoch": 1.3814409993308052, "grad_norm": 0.3558218777179718, "learning_rate": 1.136658276649094e-05, "loss": 0.4507, "step": 6193 }, { "epoch": 1.3816640642426945, "grad_norm": 0.18921050429344177, "learning_rate": 1.1364251378743266e-05, "loss": 0.4656, "step": 6194 }, { "epoch": 1.381887129154584, "grad_norm": 0.1709054410457611, "learning_rate": 1.136191991543477e-05, "loss": 0.468, "step": 6195 }, { "epoch": 1.3821101940664733, "grad_norm": 0.17301930487155914, "learning_rate": 1.1359588376694577e-05, "loss": 0.4486, "step": 6196 }, { "epoch": 1.3823332589783628, "grad_norm": 0.17260199785232544, "learning_rate": 1.1357256762651834e-05, "loss": 0.4671, "step": 6197 }, { "epoch": 1.382556323890252, "grad_norm": 0.16965550184249878, "learning_rate": 1.135492507343567e-05, "loss": 0.4657, "step": 6198 }, { "epoch": 1.3827793888021414, "grad_norm": 0.17701901495456696, "learning_rate": 1.1352593309175233e-05, "loss": 0.4698, "step": 6199 }, { "epoch": 1.3830024537140309, "grad_norm": 0.16689814627170563, "learning_rate": 1.135026146999967e-05, "loss": 0.4606, "step": 6200 }, { "epoch": 1.3832255186259201, "grad_norm": 0.1691475808620453, "learning_rate": 1.1347929556038135e-05, "loss": 0.4287, "step": 6201 }, { "epoch": 1.3834485835378096, "grad_norm": 0.17036226391792297, "learning_rate": 1.1345597567419782e-05, "loss": 0.44, "step": 6202 }, { "epoch": 1.383671648449699, "grad_norm": 0.1688498854637146, "learning_rate": 1.1343265504273773e-05, "loss": 0.4621, "step": 6203 }, { "epoch": 1.3838947133615882, "grad_norm": 0.17137567698955536, "learning_rate": 1.1340933366729268e-05, "loss": 0.4415, "step": 6204 }, { "epoch": 1.3841177782734775, "grad_norm": 0.17283524572849274, "learning_rate": 1.1338601154915441e-05, "loss": 0.4303, "step": 6205 }, { "epoch": 1.384340843185367, "grad_norm": 0.1761488914489746, "learning_rate": 1.1336268868961459e-05, "loss": 0.4892, "step": 6206 }, { "epoch": 1.3845639080972563, "grad_norm": 0.1702503263950348, "learning_rate": 1.1333936508996503e-05, "loss": 0.4523, "step": 6207 }, { "epoch": 1.3847869730091458, "grad_norm": 0.17347446084022522, "learning_rate": 1.1331604075149753e-05, "loss": 0.4681, "step": 6208 }, { "epoch": 1.385010037921035, "grad_norm": 0.18203677237033844, "learning_rate": 1.1329271567550394e-05, "loss": 0.4697, "step": 6209 }, { "epoch": 1.3852331028329243, "grad_norm": 0.168579563498497, "learning_rate": 1.132693898632761e-05, "loss": 0.4579, "step": 6210 }, { "epoch": 1.3854561677448136, "grad_norm": 0.17098036408424377, "learning_rate": 1.1324606331610602e-05, "loss": 0.4451, "step": 6211 }, { "epoch": 1.3856792326567031, "grad_norm": 0.1700953096151352, "learning_rate": 1.1322273603528562e-05, "loss": 0.4446, "step": 6212 }, { "epoch": 1.3859022975685924, "grad_norm": 0.18041159212589264, "learning_rate": 1.1319940802210692e-05, "loss": 0.4943, "step": 6213 }, { "epoch": 1.386125362480482, "grad_norm": 0.17734865844249725, "learning_rate": 1.1317607927786201e-05, "loss": 0.4507, "step": 6214 }, { "epoch": 1.3863484273923712, "grad_norm": 0.18012134730815887, "learning_rate": 1.131527498038429e-05, "loss": 0.4572, "step": 6215 }, { "epoch": 1.3865714923042605, "grad_norm": 0.17000438272953033, "learning_rate": 1.131294196013418e-05, "loss": 0.4896, "step": 6216 }, { "epoch": 1.38679455721615, "grad_norm": 0.16048485040664673, "learning_rate": 1.1310608867165082e-05, "loss": 0.4398, "step": 6217 }, { "epoch": 1.3870176221280393, "grad_norm": 0.17833036184310913, "learning_rate": 1.1308275701606226e-05, "loss": 0.4706, "step": 6218 }, { "epoch": 1.3872406870399288, "grad_norm": 0.17137986421585083, "learning_rate": 1.130594246358683e-05, "loss": 0.4862, "step": 6219 }, { "epoch": 1.387463751951818, "grad_norm": 0.1729787439107895, "learning_rate": 1.1303609153236127e-05, "loss": 0.4607, "step": 6220 }, { "epoch": 1.3876868168637073, "grad_norm": 0.16867367923259735, "learning_rate": 1.1301275770683344e-05, "loss": 0.4671, "step": 6221 }, { "epoch": 1.3879098817755966, "grad_norm": 0.17126032710075378, "learning_rate": 1.1298942316057731e-05, "loss": 0.4162, "step": 6222 }, { "epoch": 1.388132946687486, "grad_norm": 0.16363003849983215, "learning_rate": 1.1296608789488515e-05, "loss": 0.4292, "step": 6223 }, { "epoch": 1.3883560115993754, "grad_norm": 0.16574527323246002, "learning_rate": 1.1294275191104952e-05, "loss": 0.4622, "step": 6224 }, { "epoch": 1.3885790765112649, "grad_norm": 0.16721156239509583, "learning_rate": 1.1291941521036286e-05, "loss": 0.4569, "step": 6225 }, { "epoch": 1.3888021414231542, "grad_norm": 0.16934387385845184, "learning_rate": 1.1289607779411775e-05, "loss": 0.4488, "step": 6226 }, { "epoch": 1.3890252063350434, "grad_norm": 0.16877540946006775, "learning_rate": 1.1287273966360673e-05, "loss": 0.456, "step": 6227 }, { "epoch": 1.3892482712469327, "grad_norm": 0.17397183179855347, "learning_rate": 1.1284940082012238e-05, "loss": 0.4727, "step": 6228 }, { "epoch": 1.3894713361588222, "grad_norm": 0.1647840440273285, "learning_rate": 1.128260612649574e-05, "loss": 0.4497, "step": 6229 }, { "epoch": 1.3896944010707115, "grad_norm": 0.1644977629184723, "learning_rate": 1.1280272099940446e-05, "loss": 0.4395, "step": 6230 }, { "epoch": 1.389917465982601, "grad_norm": 0.17503903806209564, "learning_rate": 1.1277938002475633e-05, "loss": 0.4515, "step": 6231 }, { "epoch": 1.3901405308944903, "grad_norm": 0.16988563537597656, "learning_rate": 1.127560383423057e-05, "loss": 0.4365, "step": 6232 }, { "epoch": 1.3903635958063796, "grad_norm": 0.1652899980545044, "learning_rate": 1.1273269595334547e-05, "loss": 0.4484, "step": 6233 }, { "epoch": 1.390586660718269, "grad_norm": 0.1692708283662796, "learning_rate": 1.1270935285916842e-05, "loss": 0.4472, "step": 6234 }, { "epoch": 1.3908097256301584, "grad_norm": 0.16943638026714325, "learning_rate": 1.1268600906106749e-05, "loss": 0.4454, "step": 6235 }, { "epoch": 1.3910327905420479, "grad_norm": 0.1718982458114624, "learning_rate": 1.1266266456033555e-05, "loss": 0.4305, "step": 6236 }, { "epoch": 1.3912558554539372, "grad_norm": 0.17046236991882324, "learning_rate": 1.1263931935826561e-05, "loss": 0.4383, "step": 6237 }, { "epoch": 1.3914789203658264, "grad_norm": 0.17139141261577606, "learning_rate": 1.1261597345615064e-05, "loss": 0.4437, "step": 6238 }, { "epoch": 1.3917019852777157, "grad_norm": 0.1709447205066681, "learning_rate": 1.1259262685528376e-05, "loss": 0.4938, "step": 6239 }, { "epoch": 1.3919250501896052, "grad_norm": 0.16337740421295166, "learning_rate": 1.1256927955695793e-05, "loss": 0.4651, "step": 6240 }, { "epoch": 1.3921481151014945, "grad_norm": 0.17332367599010468, "learning_rate": 1.1254593156246638e-05, "loss": 0.4803, "step": 6241 }, { "epoch": 1.392371180013384, "grad_norm": 0.17104537785053253, "learning_rate": 1.1252258287310219e-05, "loss": 0.4717, "step": 6242 }, { "epoch": 1.3925942449252733, "grad_norm": 0.16978146135807037, "learning_rate": 1.1249923349015859e-05, "loss": 0.4999, "step": 6243 }, { "epoch": 1.3928173098371626, "grad_norm": 0.17217592895030975, "learning_rate": 1.1247588341492884e-05, "loss": 0.4738, "step": 6244 }, { "epoch": 1.3930403747490518, "grad_norm": 0.17651988565921783, "learning_rate": 1.1245253264870616e-05, "loss": 0.441, "step": 6245 }, { "epoch": 1.3932634396609413, "grad_norm": 0.16616612672805786, "learning_rate": 1.1242918119278395e-05, "loss": 0.4334, "step": 6246 }, { "epoch": 1.3934865045728306, "grad_norm": 0.17374052107334137, "learning_rate": 1.1240582904845542e-05, "loss": 0.4464, "step": 6247 }, { "epoch": 1.3937095694847201, "grad_norm": 0.17393873631954193, "learning_rate": 1.1238247621701413e-05, "loss": 0.4438, "step": 6248 }, { "epoch": 1.3939326343966094, "grad_norm": 0.17229264974594116, "learning_rate": 1.1235912269975335e-05, "loss": 0.4775, "step": 6249 }, { "epoch": 1.3941556993084987, "grad_norm": 0.17282813787460327, "learning_rate": 1.1233576849796666e-05, "loss": 0.454, "step": 6250 }, { "epoch": 1.3943787642203882, "grad_norm": 0.18351981043815613, "learning_rate": 1.1231241361294747e-05, "loss": 0.4702, "step": 6251 }, { "epoch": 1.3946018291322775, "grad_norm": 0.17423385381698608, "learning_rate": 1.122890580459894e-05, "loss": 0.4506, "step": 6252 }, { "epoch": 1.394824894044167, "grad_norm": 0.17099998891353607, "learning_rate": 1.1226570179838596e-05, "loss": 0.4509, "step": 6253 }, { "epoch": 1.3950479589560563, "grad_norm": 0.16235356032848358, "learning_rate": 1.1224234487143085e-05, "loss": 0.4243, "step": 6254 }, { "epoch": 1.3952710238679455, "grad_norm": 0.17439930140972137, "learning_rate": 1.1221898726641762e-05, "loss": 0.4701, "step": 6255 }, { "epoch": 1.3954940887798348, "grad_norm": 0.16977347433567047, "learning_rate": 1.1219562898464006e-05, "loss": 0.4516, "step": 6256 }, { "epoch": 1.3957171536917243, "grad_norm": 0.16299669444561005, "learning_rate": 1.1217227002739181e-05, "loss": 0.4579, "step": 6257 }, { "epoch": 1.3959402186036136, "grad_norm": 0.17073673009872437, "learning_rate": 1.1214891039596673e-05, "loss": 0.4989, "step": 6258 }, { "epoch": 1.3961632835155031, "grad_norm": 0.16990354657173157, "learning_rate": 1.1212555009165852e-05, "loss": 0.436, "step": 6259 }, { "epoch": 1.3963863484273924, "grad_norm": 0.16700886189937592, "learning_rate": 1.1210218911576112e-05, "loss": 0.4324, "step": 6260 }, { "epoch": 1.3966094133392817, "grad_norm": 0.16864052414894104, "learning_rate": 1.1207882746956834e-05, "loss": 0.4834, "step": 6261 }, { "epoch": 1.396832478251171, "grad_norm": 0.16238147020339966, "learning_rate": 1.1205546515437413e-05, "loss": 0.4447, "step": 6262 }, { "epoch": 1.3970555431630605, "grad_norm": 0.1640692502260208, "learning_rate": 1.120321021714724e-05, "loss": 0.4539, "step": 6263 }, { "epoch": 1.3972786080749497, "grad_norm": 0.16666129231452942, "learning_rate": 1.1200873852215717e-05, "loss": 0.4434, "step": 6264 }, { "epoch": 1.3975016729868392, "grad_norm": 0.16352206468582153, "learning_rate": 1.1198537420772249e-05, "loss": 0.4345, "step": 6265 }, { "epoch": 1.3977247378987285, "grad_norm": 0.16906216740608215, "learning_rate": 1.1196200922946237e-05, "loss": 0.4753, "step": 6266 }, { "epoch": 1.3979478028106178, "grad_norm": 0.16937586665153503, "learning_rate": 1.1193864358867097e-05, "loss": 0.4545, "step": 6267 }, { "epoch": 1.3981708677225073, "grad_norm": 0.17257355153560638, "learning_rate": 1.1191527728664235e-05, "loss": 0.4722, "step": 6268 }, { "epoch": 1.3983939326343966, "grad_norm": 0.1705016791820526, "learning_rate": 1.1189191032467074e-05, "loss": 0.4647, "step": 6269 }, { "epoch": 1.398616997546286, "grad_norm": 0.23135609924793243, "learning_rate": 1.1186854270405035e-05, "loss": 0.4725, "step": 6270 }, { "epoch": 1.3988400624581754, "grad_norm": 0.17285971343517303, "learning_rate": 1.1184517442607538e-05, "loss": 0.4284, "step": 6271 }, { "epoch": 1.3990631273700647, "grad_norm": 0.17679868638515472, "learning_rate": 1.1182180549204013e-05, "loss": 0.4412, "step": 6272 }, { "epoch": 1.399286192281954, "grad_norm": 0.16253626346588135, "learning_rate": 1.1179843590323897e-05, "loss": 0.4321, "step": 6273 }, { "epoch": 1.3995092571938434, "grad_norm": 0.1678762137889862, "learning_rate": 1.1177506566096619e-05, "loss": 0.483, "step": 6274 }, { "epoch": 1.3997323221057327, "grad_norm": 0.16577322781085968, "learning_rate": 1.1175169476651622e-05, "loss": 0.4419, "step": 6275 }, { "epoch": 1.3999553870176222, "grad_norm": 0.16397406160831451, "learning_rate": 1.1172832322118346e-05, "loss": 0.4529, "step": 6276 }, { "epoch": 1.4001784519295115, "grad_norm": 0.17259271442890167, "learning_rate": 1.1170495102626238e-05, "loss": 0.4763, "step": 6277 }, { "epoch": 1.4004015168414008, "grad_norm": 0.16409693658351898, "learning_rate": 1.116815781830475e-05, "loss": 0.4737, "step": 6278 }, { "epoch": 1.40062458175329, "grad_norm": 0.17240269482135773, "learning_rate": 1.1165820469283333e-05, "loss": 0.4708, "step": 6279 }, { "epoch": 1.4008476466651796, "grad_norm": 0.16498403251171112, "learning_rate": 1.1163483055691447e-05, "loss": 0.4387, "step": 6280 }, { "epoch": 1.4010707115770689, "grad_norm": 0.16875067353248596, "learning_rate": 1.116114557765855e-05, "loss": 0.4768, "step": 6281 }, { "epoch": 1.4012937764889584, "grad_norm": 0.16360358893871307, "learning_rate": 1.1158808035314105e-05, "loss": 0.4598, "step": 6282 }, { "epoch": 1.4015168414008476, "grad_norm": 0.1615404486656189, "learning_rate": 1.1156470428787582e-05, "loss": 0.4529, "step": 6283 }, { "epoch": 1.401739906312737, "grad_norm": 0.16733388602733612, "learning_rate": 1.1154132758208456e-05, "loss": 0.4564, "step": 6284 }, { "epoch": 1.4019629712246264, "grad_norm": 0.176735982298851, "learning_rate": 1.1151795023706194e-05, "loss": 0.4371, "step": 6285 }, { "epoch": 1.4021860361365157, "grad_norm": 0.17406433820724487, "learning_rate": 1.1149457225410281e-05, "loss": 0.4602, "step": 6286 }, { "epoch": 1.4024091010484052, "grad_norm": 0.18209131062030792, "learning_rate": 1.1147119363450197e-05, "loss": 0.4559, "step": 6287 }, { "epoch": 1.4026321659602945, "grad_norm": 0.17238366603851318, "learning_rate": 1.1144781437955426e-05, "loss": 0.4725, "step": 6288 }, { "epoch": 1.4028552308721838, "grad_norm": 0.16568414866924286, "learning_rate": 1.1142443449055455e-05, "loss": 0.4291, "step": 6289 }, { "epoch": 1.403078295784073, "grad_norm": 0.17432232201099396, "learning_rate": 1.1140105396879783e-05, "loss": 0.4827, "step": 6290 }, { "epoch": 1.4033013606959626, "grad_norm": 0.17041410505771637, "learning_rate": 1.11377672815579e-05, "loss": 0.4523, "step": 6291 }, { "epoch": 1.4035244256078518, "grad_norm": 0.1681598722934723, "learning_rate": 1.113542910321931e-05, "loss": 0.4588, "step": 6292 }, { "epoch": 1.4037474905197413, "grad_norm": 0.16634954512119293, "learning_rate": 1.1133090861993514e-05, "loss": 0.4306, "step": 6293 }, { "epoch": 1.4039705554316306, "grad_norm": 0.1695157140493393, "learning_rate": 1.113075255801002e-05, "loss": 0.4532, "step": 6294 }, { "epoch": 1.40419362034352, "grad_norm": 0.16797398030757904, "learning_rate": 1.1128414191398333e-05, "loss": 0.4452, "step": 6295 }, { "epoch": 1.4044166852554092, "grad_norm": 0.1665017306804657, "learning_rate": 1.1126075762287972e-05, "loss": 0.4523, "step": 6296 }, { "epoch": 1.4046397501672987, "grad_norm": 0.17001719772815704, "learning_rate": 1.1123737270808452e-05, "loss": 0.4472, "step": 6297 }, { "epoch": 1.404862815079188, "grad_norm": 0.16899511218070984, "learning_rate": 1.1121398717089294e-05, "loss": 0.4753, "step": 6298 }, { "epoch": 1.4050858799910775, "grad_norm": 0.16389401257038116, "learning_rate": 1.1119060101260021e-05, "loss": 0.4494, "step": 6299 }, { "epoch": 1.4053089449029668, "grad_norm": 0.16568145155906677, "learning_rate": 1.1116721423450158e-05, "loss": 0.4509, "step": 6300 }, { "epoch": 1.405532009814856, "grad_norm": 0.16839419305324554, "learning_rate": 1.1114382683789241e-05, "loss": 0.4425, "step": 6301 }, { "epoch": 1.4057550747267455, "grad_norm": 0.17624156177043915, "learning_rate": 1.1112043882406802e-05, "loss": 0.4668, "step": 6302 }, { "epoch": 1.4059781396386348, "grad_norm": 0.2432551085948944, "learning_rate": 1.1109705019432378e-05, "loss": 0.4504, "step": 6303 }, { "epoch": 1.4062012045505243, "grad_norm": 0.17527322471141815, "learning_rate": 1.1107366094995506e-05, "loss": 0.4734, "step": 6304 }, { "epoch": 1.4064242694624136, "grad_norm": 0.16600951552391052, "learning_rate": 1.1105027109225737e-05, "loss": 0.4789, "step": 6305 }, { "epoch": 1.406647334374303, "grad_norm": 0.20400017499923706, "learning_rate": 1.1102688062252614e-05, "loss": 0.4688, "step": 6306 }, { "epoch": 1.4068703992861922, "grad_norm": 0.17171353101730347, "learning_rate": 1.110034895420569e-05, "loss": 0.4374, "step": 6307 }, { "epoch": 1.4070934641980817, "grad_norm": 0.1854102462530136, "learning_rate": 1.1098009785214523e-05, "loss": 0.4255, "step": 6308 }, { "epoch": 1.407316529109971, "grad_norm": 0.16316324472427368, "learning_rate": 1.1095670555408662e-05, "loss": 0.4676, "step": 6309 }, { "epoch": 1.4075395940218605, "grad_norm": 0.18751972913742065, "learning_rate": 1.1093331264917676e-05, "loss": 0.4734, "step": 6310 }, { "epoch": 1.4077626589337497, "grad_norm": 0.22429490089416504, "learning_rate": 1.1090991913871128e-05, "loss": 0.444, "step": 6311 }, { "epoch": 1.407985723845639, "grad_norm": 0.16278603672981262, "learning_rate": 1.1088652502398585e-05, "loss": 0.4528, "step": 6312 }, { "epoch": 1.4082087887575283, "grad_norm": 0.1643955558538437, "learning_rate": 1.108631303062962e-05, "loss": 0.4646, "step": 6313 }, { "epoch": 1.4084318536694178, "grad_norm": 0.17145782709121704, "learning_rate": 1.1083973498693802e-05, "loss": 0.4592, "step": 6314 }, { "epoch": 1.408654918581307, "grad_norm": 0.18193364143371582, "learning_rate": 1.1081633906720714e-05, "loss": 0.4932, "step": 6315 }, { "epoch": 1.4088779834931966, "grad_norm": 0.17122429609298706, "learning_rate": 1.1079294254839941e-05, "loss": 0.4708, "step": 6316 }, { "epoch": 1.4091010484050859, "grad_norm": 0.1604917347431183, "learning_rate": 1.1076954543181058e-05, "loss": 0.4659, "step": 6317 }, { "epoch": 1.4093241133169752, "grad_norm": 0.1739075481891632, "learning_rate": 1.1074614771873661e-05, "loss": 0.4714, "step": 6318 }, { "epoch": 1.4095471782288647, "grad_norm": 0.16061022877693176, "learning_rate": 1.1072274941047336e-05, "loss": 0.4414, "step": 6319 }, { "epoch": 1.409770243140754, "grad_norm": 0.1809171438217163, "learning_rate": 1.1069935050831683e-05, "loss": 0.4467, "step": 6320 }, { "epoch": 1.4099933080526434, "grad_norm": 0.1696557253599167, "learning_rate": 1.1067595101356295e-05, "loss": 0.462, "step": 6321 }, { "epoch": 1.4102163729645327, "grad_norm": 0.1734270453453064, "learning_rate": 1.1065255092750774e-05, "loss": 0.4859, "step": 6322 }, { "epoch": 1.410439437876422, "grad_norm": 0.1748056411743164, "learning_rate": 1.1062915025144727e-05, "loss": 0.4709, "step": 6323 }, { "epoch": 1.4106625027883113, "grad_norm": 0.17737674713134766, "learning_rate": 1.106057489866776e-05, "loss": 0.4875, "step": 6324 }, { "epoch": 1.4108855677002008, "grad_norm": 0.16855676472187042, "learning_rate": 1.105823471344948e-05, "loss": 0.4713, "step": 6325 }, { "epoch": 1.41110863261209, "grad_norm": 0.17489537596702576, "learning_rate": 1.105589446961951e-05, "loss": 0.4678, "step": 6326 }, { "epoch": 1.4113316975239796, "grad_norm": 0.19364696741104126, "learning_rate": 1.1053554167307458e-05, "loss": 0.4732, "step": 6327 }, { "epoch": 1.4115547624358689, "grad_norm": 0.17721161246299744, "learning_rate": 1.1051213806642951e-05, "loss": 0.4753, "step": 6328 }, { "epoch": 1.4117778273477581, "grad_norm": 0.17709079384803772, "learning_rate": 1.1048873387755615e-05, "loss": 0.4453, "step": 6329 }, { "epoch": 1.4120008922596474, "grad_norm": 0.1669922173023224, "learning_rate": 1.1046532910775068e-05, "loss": 0.4639, "step": 6330 }, { "epoch": 1.412223957171537, "grad_norm": 0.17192141711711884, "learning_rate": 1.1044192375830946e-05, "loss": 0.4559, "step": 6331 }, { "epoch": 1.4124470220834262, "grad_norm": 0.16646385192871094, "learning_rate": 1.1041851783052882e-05, "loss": 0.4393, "step": 6332 }, { "epoch": 1.4126700869953157, "grad_norm": 0.1777653992176056, "learning_rate": 1.1039511132570516e-05, "loss": 0.4681, "step": 6333 }, { "epoch": 1.412893151907205, "grad_norm": 0.17389416694641113, "learning_rate": 1.1037170424513482e-05, "loss": 0.4644, "step": 6334 }, { "epoch": 1.4131162168190943, "grad_norm": 0.17686106264591217, "learning_rate": 1.1034829659011426e-05, "loss": 0.4946, "step": 6335 }, { "epoch": 1.4133392817309838, "grad_norm": 0.17192257940769196, "learning_rate": 1.1032488836193994e-05, "loss": 0.4583, "step": 6336 }, { "epoch": 1.413562346642873, "grad_norm": 0.17798608541488647, "learning_rate": 1.1030147956190835e-05, "loss": 0.453, "step": 6337 }, { "epoch": 1.4137854115547626, "grad_norm": 0.18982993066310883, "learning_rate": 1.1027807019131605e-05, "loss": 0.448, "step": 6338 }, { "epoch": 1.4140084764666518, "grad_norm": 0.17006950080394745, "learning_rate": 1.1025466025145955e-05, "loss": 0.4625, "step": 6339 }, { "epoch": 1.4142315413785411, "grad_norm": 0.16911807656288147, "learning_rate": 1.1023124974363546e-05, "loss": 0.4514, "step": 6340 }, { "epoch": 1.4144546062904304, "grad_norm": 0.16757598519325256, "learning_rate": 1.1020783866914042e-05, "loss": 0.4415, "step": 6341 }, { "epoch": 1.41467767120232, "grad_norm": 0.18296495079994202, "learning_rate": 1.1018442702927104e-05, "loss": 0.4786, "step": 6342 }, { "epoch": 1.4149007361142092, "grad_norm": 0.16517338156700134, "learning_rate": 1.1016101482532404e-05, "loss": 0.432, "step": 6343 }, { "epoch": 1.4151238010260987, "grad_norm": 0.16575907170772552, "learning_rate": 1.1013760205859611e-05, "loss": 0.4495, "step": 6344 }, { "epoch": 1.415346865937988, "grad_norm": 0.17554599046707153, "learning_rate": 1.1011418873038404e-05, "loss": 0.4354, "step": 6345 }, { "epoch": 1.4155699308498773, "grad_norm": 0.17527008056640625, "learning_rate": 1.1009077484198456e-05, "loss": 0.4729, "step": 6346 }, { "epoch": 1.4157929957617665, "grad_norm": 0.1661120504140854, "learning_rate": 1.100673603946945e-05, "loss": 0.456, "step": 6347 }, { "epoch": 1.416016060673656, "grad_norm": 0.16820774972438812, "learning_rate": 1.1004394538981069e-05, "loss": 0.4675, "step": 6348 }, { "epoch": 1.4162391255855453, "grad_norm": 0.16874277591705322, "learning_rate": 1.1002052982863001e-05, "loss": 0.4414, "step": 6349 }, { "epoch": 1.4164621904974348, "grad_norm": 0.17940542101860046, "learning_rate": 1.0999711371244936e-05, "loss": 0.4615, "step": 6350 }, { "epoch": 1.416685255409324, "grad_norm": 0.1683221310377121, "learning_rate": 1.0997369704256566e-05, "loss": 0.4633, "step": 6351 }, { "epoch": 1.4169083203212134, "grad_norm": 0.17308497428894043, "learning_rate": 1.0995027982027588e-05, "loss": 0.4399, "step": 6352 }, { "epoch": 1.417131385233103, "grad_norm": 0.17420804500579834, "learning_rate": 1.0992686204687701e-05, "loss": 0.4701, "step": 6353 }, { "epoch": 1.4173544501449922, "grad_norm": 0.1695612072944641, "learning_rate": 1.0990344372366611e-05, "loss": 0.4307, "step": 6354 }, { "epoch": 1.4175775150568817, "grad_norm": 0.17334359884262085, "learning_rate": 1.0988002485194016e-05, "loss": 0.4673, "step": 6355 }, { "epoch": 1.417800579968771, "grad_norm": 0.1705588549375534, "learning_rate": 1.0985660543299632e-05, "loss": 0.4589, "step": 6356 }, { "epoch": 1.4180236448806602, "grad_norm": 0.1716470867395401, "learning_rate": 1.0983318546813164e-05, "loss": 0.4645, "step": 6357 }, { "epoch": 1.4182467097925495, "grad_norm": 0.17242403328418732, "learning_rate": 1.098097649586433e-05, "loss": 0.4401, "step": 6358 }, { "epoch": 1.418469774704439, "grad_norm": 0.1679268628358841, "learning_rate": 1.0978634390582847e-05, "loss": 0.46, "step": 6359 }, { "epoch": 1.4186928396163283, "grad_norm": 0.17091570794582367, "learning_rate": 1.0976292231098435e-05, "loss": 0.4361, "step": 6360 }, { "epoch": 1.4189159045282178, "grad_norm": 0.17040546238422394, "learning_rate": 1.0973950017540823e-05, "loss": 0.4295, "step": 6361 }, { "epoch": 1.419138969440107, "grad_norm": 0.17108914256095886, "learning_rate": 1.0971607750039727e-05, "loss": 0.4581, "step": 6362 }, { "epoch": 1.4193620343519964, "grad_norm": 0.16708308458328247, "learning_rate": 1.0969265428724887e-05, "loss": 0.455, "step": 6363 }, { "epoch": 1.4195850992638857, "grad_norm": 0.16952864825725555, "learning_rate": 1.0966923053726025e-05, "loss": 0.4411, "step": 6364 }, { "epoch": 1.4198081641757752, "grad_norm": 0.16820164024829865, "learning_rate": 1.0964580625172887e-05, "loss": 0.4328, "step": 6365 }, { "epoch": 1.4200312290876644, "grad_norm": 0.16284674406051636, "learning_rate": 1.0962238143195203e-05, "loss": 0.4227, "step": 6366 }, { "epoch": 1.420254293999554, "grad_norm": 0.16655023396015167, "learning_rate": 1.0959895607922722e-05, "loss": 0.4568, "step": 6367 }, { "epoch": 1.4204773589114432, "grad_norm": 0.17234466969966888, "learning_rate": 1.095755301948518e-05, "loss": 0.4756, "step": 6368 }, { "epoch": 1.4207004238233325, "grad_norm": 0.1787555068731308, "learning_rate": 1.0955210378012331e-05, "loss": 0.4822, "step": 6369 }, { "epoch": 1.420923488735222, "grad_norm": 0.1660604625940323, "learning_rate": 1.0952867683633922e-05, "loss": 0.4712, "step": 6370 }, { "epoch": 1.4211465536471113, "grad_norm": 0.19277963042259216, "learning_rate": 1.0950524936479708e-05, "loss": 0.4813, "step": 6371 }, { "epoch": 1.4213696185590008, "grad_norm": 0.16839838027954102, "learning_rate": 1.0948182136679442e-05, "loss": 0.411, "step": 6372 }, { "epoch": 1.42159268347089, "grad_norm": 0.18013301491737366, "learning_rate": 1.0945839284362885e-05, "loss": 0.4256, "step": 6373 }, { "epoch": 1.4218157483827794, "grad_norm": 0.16745387017726898, "learning_rate": 1.09434963796598e-05, "loss": 0.4408, "step": 6374 }, { "epoch": 1.4220388132946686, "grad_norm": 0.16723443567752838, "learning_rate": 1.094115342269995e-05, "loss": 0.4702, "step": 6375 }, { "epoch": 1.4222618782065581, "grad_norm": 0.17772428691387177, "learning_rate": 1.0938810413613103e-05, "loss": 0.4401, "step": 6376 }, { "epoch": 1.4224849431184474, "grad_norm": 0.17869889736175537, "learning_rate": 1.0936467352529032e-05, "loss": 0.4536, "step": 6377 }, { "epoch": 1.422708008030337, "grad_norm": 0.18365752696990967, "learning_rate": 1.0934124239577506e-05, "loss": 0.4603, "step": 6378 }, { "epoch": 1.4229310729422262, "grad_norm": 0.18287013471126556, "learning_rate": 1.0931781074888306e-05, "loss": 0.4501, "step": 6379 }, { "epoch": 1.4231541378541155, "grad_norm": 0.21559575200080872, "learning_rate": 1.0929437858591207e-05, "loss": 0.4447, "step": 6380 }, { "epoch": 1.423377202766005, "grad_norm": 0.189785435795784, "learning_rate": 1.0927094590815992e-05, "loss": 0.4608, "step": 6381 }, { "epoch": 1.4236002676778943, "grad_norm": 0.17252789437770844, "learning_rate": 1.092475127169245e-05, "loss": 0.4806, "step": 6382 }, { "epoch": 1.4238233325897836, "grad_norm": 0.16337281465530396, "learning_rate": 1.0922407901350365e-05, "loss": 0.4523, "step": 6383 }, { "epoch": 1.424046397501673, "grad_norm": 0.17701326310634613, "learning_rate": 1.0920064479919527e-05, "loss": 0.4629, "step": 6384 }, { "epoch": 1.4242694624135623, "grad_norm": 0.17202292382717133, "learning_rate": 1.0917721007529731e-05, "loss": 0.4646, "step": 6385 }, { "epoch": 1.4244925273254516, "grad_norm": 0.18118809163570404, "learning_rate": 1.0915377484310774e-05, "loss": 0.4814, "step": 6386 }, { "epoch": 1.4247155922373411, "grad_norm": 0.1883123368024826, "learning_rate": 1.0913033910392452e-05, "loss": 0.4767, "step": 6387 }, { "epoch": 1.4249386571492304, "grad_norm": 0.16664288938045502, "learning_rate": 1.0910690285904573e-05, "loss": 0.429, "step": 6388 }, { "epoch": 1.42516172206112, "grad_norm": 0.17694927752017975, "learning_rate": 1.0908346610976934e-05, "loss": 0.4506, "step": 6389 }, { "epoch": 1.4253847869730092, "grad_norm": 0.17796604335308075, "learning_rate": 1.0906002885739348e-05, "loss": 0.4564, "step": 6390 }, { "epoch": 1.4256078518848985, "grad_norm": 0.1945108026266098, "learning_rate": 1.090365911032162e-05, "loss": 0.4479, "step": 6391 }, { "epoch": 1.4258309167967878, "grad_norm": 0.1725914031267166, "learning_rate": 1.0901315284853566e-05, "loss": 0.4606, "step": 6392 }, { "epoch": 1.4260539817086773, "grad_norm": 0.16722865402698517, "learning_rate": 1.0898971409465006e-05, "loss": 0.4595, "step": 6393 }, { "epoch": 1.4262770466205665, "grad_norm": 0.1750846952199936, "learning_rate": 1.0896627484285752e-05, "loss": 0.4485, "step": 6394 }, { "epoch": 1.426500111532456, "grad_norm": 0.1740426868200302, "learning_rate": 1.0894283509445629e-05, "loss": 0.4627, "step": 6395 }, { "epoch": 1.4267231764443453, "grad_norm": 0.17253148555755615, "learning_rate": 1.0891939485074459e-05, "loss": 0.4481, "step": 6396 }, { "epoch": 1.4269462413562346, "grad_norm": 0.17242968082427979, "learning_rate": 1.088959541130207e-05, "loss": 0.4573, "step": 6397 }, { "epoch": 1.427169306268124, "grad_norm": 0.1693577617406845, "learning_rate": 1.0887251288258291e-05, "loss": 0.461, "step": 6398 }, { "epoch": 1.4273923711800134, "grad_norm": 0.17257948219776154, "learning_rate": 1.0884907116072956e-05, "loss": 0.4735, "step": 6399 }, { "epoch": 1.4276154360919027, "grad_norm": 0.15882723033428192, "learning_rate": 1.0882562894875897e-05, "loss": 0.4495, "step": 6400 }, { "epoch": 1.4278385010037922, "grad_norm": 0.16930413246154785, "learning_rate": 1.0880218624796954e-05, "loss": 0.4321, "step": 6401 }, { "epoch": 1.4280615659156815, "grad_norm": 0.1744142770767212, "learning_rate": 1.0877874305965968e-05, "loss": 0.4472, "step": 6402 }, { "epoch": 1.4282846308275707, "grad_norm": 0.1769794076681137, "learning_rate": 1.0875529938512779e-05, "loss": 0.475, "step": 6403 }, { "epoch": 1.4285076957394602, "grad_norm": 0.1765398234128952, "learning_rate": 1.0873185522567236e-05, "loss": 0.4595, "step": 6404 }, { "epoch": 1.4287307606513495, "grad_norm": 0.1690848469734192, "learning_rate": 1.0870841058259185e-05, "loss": 0.4647, "step": 6405 }, { "epoch": 1.428953825563239, "grad_norm": 0.17777849733829498, "learning_rate": 1.086849654571848e-05, "loss": 0.4644, "step": 6406 }, { "epoch": 1.4291768904751283, "grad_norm": 0.17765332758426666, "learning_rate": 1.0866151985074973e-05, "loss": 0.4528, "step": 6407 }, { "epoch": 1.4293999553870176, "grad_norm": 0.1740536242723465, "learning_rate": 1.0863807376458516e-05, "loss": 0.4675, "step": 6408 }, { "epoch": 1.4296230202989069, "grad_norm": 0.16334398090839386, "learning_rate": 1.0861462719998981e-05, "loss": 0.4409, "step": 6409 }, { "epoch": 1.4298460852107964, "grad_norm": 0.17333824932575226, "learning_rate": 1.0859118015826216e-05, "loss": 0.4712, "step": 6410 }, { "epoch": 1.4300691501226857, "grad_norm": 0.17034272849559784, "learning_rate": 1.0856773264070092e-05, "loss": 0.4635, "step": 6411 }, { "epoch": 1.4302922150345752, "grad_norm": 0.16784878075122833, "learning_rate": 1.0854428464860476e-05, "loss": 0.4438, "step": 6412 }, { "epoch": 1.4305152799464644, "grad_norm": 0.17363719642162323, "learning_rate": 1.0852083618327239e-05, "loss": 0.4789, "step": 6413 }, { "epoch": 1.4307383448583537, "grad_norm": 0.16398392617702484, "learning_rate": 1.084973872460025e-05, "loss": 0.4559, "step": 6414 }, { "epoch": 1.4309614097702432, "grad_norm": 0.17007873952388763, "learning_rate": 1.0847393783809383e-05, "loss": 0.468, "step": 6415 }, { "epoch": 1.4311844746821325, "grad_norm": 0.1761980801820755, "learning_rate": 1.084504879608452e-05, "loss": 0.4525, "step": 6416 }, { "epoch": 1.4314075395940218, "grad_norm": 0.17042046785354614, "learning_rate": 1.084270376155554e-05, "loss": 0.4498, "step": 6417 }, { "epoch": 1.4316306045059113, "grad_norm": 0.1663273125886917, "learning_rate": 1.0840358680352324e-05, "loss": 0.4453, "step": 6418 }, { "epoch": 1.4318536694178006, "grad_norm": 0.1743524819612503, "learning_rate": 1.0838013552604758e-05, "loss": 0.481, "step": 6419 }, { "epoch": 1.4320767343296898, "grad_norm": 0.16921429336071014, "learning_rate": 1.083566837844273e-05, "loss": 0.4284, "step": 6420 }, { "epoch": 1.4322997992415794, "grad_norm": 0.1793074607849121, "learning_rate": 1.083332315799613e-05, "loss": 0.4842, "step": 6421 }, { "epoch": 1.4325228641534686, "grad_norm": 0.16839145123958588, "learning_rate": 1.0830977891394853e-05, "loss": 0.4509, "step": 6422 }, { "epoch": 1.4327459290653581, "grad_norm": 0.1707042008638382, "learning_rate": 1.0828632578768794e-05, "loss": 0.446, "step": 6423 }, { "epoch": 1.4329689939772474, "grad_norm": 0.16985002160072327, "learning_rate": 1.0826287220247851e-05, "loss": 0.4856, "step": 6424 }, { "epoch": 1.4331920588891367, "grad_norm": 0.17573486268520355, "learning_rate": 1.082394181596192e-05, "loss": 0.4552, "step": 6425 }, { "epoch": 1.433415123801026, "grad_norm": 0.20836280286312103, "learning_rate": 1.0821596366040911e-05, "loss": 0.4532, "step": 6426 }, { "epoch": 1.4336381887129155, "grad_norm": 0.17805512249469757, "learning_rate": 1.0819250870614729e-05, "loss": 0.462, "step": 6427 }, { "epoch": 1.4338612536248048, "grad_norm": 0.17473873496055603, "learning_rate": 1.081690532981328e-05, "loss": 0.444, "step": 6428 }, { "epoch": 1.4340843185366943, "grad_norm": 0.17013612389564514, "learning_rate": 1.081455974376647e-05, "loss": 0.4578, "step": 6429 }, { "epoch": 1.4343073834485836, "grad_norm": 0.16665178537368774, "learning_rate": 1.0812214112604224e-05, "loss": 0.432, "step": 6430 }, { "epoch": 1.4345304483604728, "grad_norm": 0.1708722859621048, "learning_rate": 1.080986843645645e-05, "loss": 0.4473, "step": 6431 }, { "epoch": 1.4347535132723623, "grad_norm": 0.17868392169475555, "learning_rate": 1.0807522715453067e-05, "loss": 0.4696, "step": 6432 }, { "epoch": 1.4349765781842516, "grad_norm": 0.17720571160316467, "learning_rate": 1.0805176949723997e-05, "loss": 0.4708, "step": 6433 }, { "epoch": 1.435199643096141, "grad_norm": 0.1707199364900589, "learning_rate": 1.080283113939916e-05, "loss": 0.4668, "step": 6434 }, { "epoch": 1.4354227080080304, "grad_norm": 0.1737671196460724, "learning_rate": 1.0800485284608488e-05, "loss": 0.4539, "step": 6435 }, { "epoch": 1.4356457729199197, "grad_norm": 0.1636429876089096, "learning_rate": 1.0798139385481903e-05, "loss": 0.4321, "step": 6436 }, { "epoch": 1.435868837831809, "grad_norm": 0.1818714439868927, "learning_rate": 1.079579344214934e-05, "loss": 0.4698, "step": 6437 }, { "epoch": 1.4360919027436985, "grad_norm": 0.17777089774608612, "learning_rate": 1.0793447454740731e-05, "loss": 0.4674, "step": 6438 }, { "epoch": 1.4363149676555877, "grad_norm": 0.16908809542655945, "learning_rate": 1.079110142338601e-05, "loss": 0.4584, "step": 6439 }, { "epoch": 1.4365380325674773, "grad_norm": 0.1798996478319168, "learning_rate": 1.0788755348215114e-05, "loss": 0.4565, "step": 6440 }, { "epoch": 1.4367610974793665, "grad_norm": 0.1786443144083023, "learning_rate": 1.0786409229357991e-05, "loss": 0.4672, "step": 6441 }, { "epoch": 1.4369841623912558, "grad_norm": 0.1697261929512024, "learning_rate": 1.0784063066944572e-05, "loss": 0.4342, "step": 6442 }, { "epoch": 1.437207227303145, "grad_norm": 0.1702648103237152, "learning_rate": 1.0781716861104812e-05, "loss": 0.4487, "step": 6443 }, { "epoch": 1.4374302922150346, "grad_norm": 0.17532885074615479, "learning_rate": 1.0779370611968652e-05, "loss": 0.4431, "step": 6444 }, { "epoch": 1.4376533571269239, "grad_norm": 0.17340442538261414, "learning_rate": 1.0777024319666048e-05, "loss": 0.4404, "step": 6445 }, { "epoch": 1.4378764220388134, "grad_norm": 0.1587846279144287, "learning_rate": 1.0774677984326946e-05, "loss": 0.4275, "step": 6446 }, { "epoch": 1.4380994869507027, "grad_norm": 0.1758798211812973, "learning_rate": 1.0772331606081308e-05, "loss": 0.462, "step": 6447 }, { "epoch": 1.438322551862592, "grad_norm": 0.16627365350723267, "learning_rate": 1.0769985185059087e-05, "loss": 0.4244, "step": 6448 }, { "epoch": 1.4385456167744815, "grad_norm": 0.17225994169712067, "learning_rate": 1.0767638721390242e-05, "loss": 0.4572, "step": 6449 }, { "epoch": 1.4387686816863707, "grad_norm": 0.1741783767938614, "learning_rate": 1.0765292215204738e-05, "loss": 0.45, "step": 6450 }, { "epoch": 1.43899174659826, "grad_norm": 0.16373707354068756, "learning_rate": 1.0762945666632534e-05, "loss": 0.4508, "step": 6451 }, { "epoch": 1.4392148115101495, "grad_norm": 0.18814869225025177, "learning_rate": 1.0760599075803601e-05, "loss": 0.5056, "step": 6452 }, { "epoch": 1.4394378764220388, "grad_norm": 0.17694085836410522, "learning_rate": 1.0758252442847907e-05, "loss": 0.4423, "step": 6453 }, { "epoch": 1.439660941333928, "grad_norm": 0.17024989426136017, "learning_rate": 1.0755905767895425e-05, "loss": 0.4288, "step": 6454 }, { "epoch": 1.4398840062458176, "grad_norm": 0.1731920838356018, "learning_rate": 1.0753559051076123e-05, "loss": 0.4909, "step": 6455 }, { "epoch": 1.4401070711577069, "grad_norm": 0.17619867622852325, "learning_rate": 1.0751212292519983e-05, "loss": 0.4567, "step": 6456 }, { "epoch": 1.4403301360695964, "grad_norm": 0.512635350227356, "learning_rate": 1.0748865492356981e-05, "loss": 0.4643, "step": 6457 }, { "epoch": 1.4405532009814856, "grad_norm": 0.17129187285900116, "learning_rate": 1.0746518650717097e-05, "loss": 0.4453, "step": 6458 }, { "epoch": 1.440776265893375, "grad_norm": 0.16431821882724762, "learning_rate": 1.0744171767730315e-05, "loss": 0.4289, "step": 6459 }, { "epoch": 1.4409993308052642, "grad_norm": 0.18094895780086517, "learning_rate": 1.0741824843526619e-05, "loss": 0.4711, "step": 6460 }, { "epoch": 1.4412223957171537, "grad_norm": 0.19894392788410187, "learning_rate": 1.0739477878235996e-05, "loss": 0.4643, "step": 6461 }, { "epoch": 1.441445460629043, "grad_norm": 0.17579229176044464, "learning_rate": 1.073713087198844e-05, "loss": 0.4529, "step": 6462 }, { "epoch": 1.4416685255409325, "grad_norm": 0.1695978045463562, "learning_rate": 1.0734783824913935e-05, "loss": 0.4772, "step": 6463 }, { "epoch": 1.4418915904528218, "grad_norm": 0.17056889832019806, "learning_rate": 1.0732436737142482e-05, "loss": 0.4648, "step": 6464 }, { "epoch": 1.442114655364711, "grad_norm": 0.18249176442623138, "learning_rate": 1.0730089608804074e-05, "loss": 0.4555, "step": 6465 }, { "epoch": 1.4423377202766006, "grad_norm": 0.2158237099647522, "learning_rate": 1.0727742440028712e-05, "loss": 0.4276, "step": 6466 }, { "epoch": 1.4425607851884898, "grad_norm": 0.17240867018699646, "learning_rate": 1.0725395230946396e-05, "loss": 0.4629, "step": 6467 }, { "epoch": 1.4427838501003791, "grad_norm": 0.16894541680812836, "learning_rate": 1.072304798168713e-05, "loss": 0.4522, "step": 6468 }, { "epoch": 1.4430069150122686, "grad_norm": 0.17741340398788452, "learning_rate": 1.0720700692380918e-05, "loss": 0.4489, "step": 6469 }, { "epoch": 1.443229979924158, "grad_norm": 0.16370341181755066, "learning_rate": 1.0718353363157767e-05, "loss": 0.4731, "step": 6470 }, { "epoch": 1.4434530448360472, "grad_norm": 0.1746392399072647, "learning_rate": 1.0716005994147694e-05, "loss": 0.4832, "step": 6471 }, { "epoch": 1.4436761097479367, "grad_norm": 0.16757871210575104, "learning_rate": 1.0713658585480697e-05, "loss": 0.4523, "step": 6472 }, { "epoch": 1.443899174659826, "grad_norm": 0.18170872330665588, "learning_rate": 1.0711311137286804e-05, "loss": 0.4357, "step": 6473 }, { "epoch": 1.4441222395717155, "grad_norm": 0.17265670001506805, "learning_rate": 1.0708963649696023e-05, "loss": 0.4685, "step": 6474 }, { "epoch": 1.4443453044836048, "grad_norm": 0.15775421261787415, "learning_rate": 1.0706616122838379e-05, "loss": 0.427, "step": 6475 }, { "epoch": 1.444568369395494, "grad_norm": 0.18243922293186188, "learning_rate": 1.0704268556843884e-05, "loss": 0.4614, "step": 6476 }, { "epoch": 1.4447914343073833, "grad_norm": 0.17497234046459198, "learning_rate": 1.0701920951842568e-05, "loss": 0.4814, "step": 6477 }, { "epoch": 1.4450144992192728, "grad_norm": 0.1762491762638092, "learning_rate": 1.0699573307964457e-05, "loss": 0.458, "step": 6478 }, { "epoch": 1.4452375641311621, "grad_norm": 0.1725545972585678, "learning_rate": 1.0697225625339573e-05, "loss": 0.4371, "step": 6479 }, { "epoch": 1.4454606290430516, "grad_norm": 0.9475487470626831, "learning_rate": 1.0694877904097952e-05, "loss": 0.4609, "step": 6480 }, { "epoch": 1.445683693954941, "grad_norm": 0.21081972122192383, "learning_rate": 1.0692530144369615e-05, "loss": 0.4515, "step": 6481 }, { "epoch": 1.4459067588668302, "grad_norm": 0.1702289879322052, "learning_rate": 1.0690182346284608e-05, "loss": 0.4586, "step": 6482 }, { "epoch": 1.4461298237787197, "grad_norm": 0.18808738887310028, "learning_rate": 1.0687834509972958e-05, "loss": 0.4688, "step": 6483 }, { "epoch": 1.446352888690609, "grad_norm": 0.17759791016578674, "learning_rate": 1.068548663556471e-05, "loss": 0.4544, "step": 6484 }, { "epoch": 1.4465759536024985, "grad_norm": 0.1792079657316208, "learning_rate": 1.0683138723189897e-05, "loss": 0.4475, "step": 6485 }, { "epoch": 1.4467990185143877, "grad_norm": 0.17630550265312195, "learning_rate": 1.0680790772978566e-05, "loss": 0.4504, "step": 6486 }, { "epoch": 1.447022083426277, "grad_norm": 0.18616117537021637, "learning_rate": 1.0678442785060758e-05, "loss": 0.4455, "step": 6487 }, { "epoch": 1.4472451483381663, "grad_norm": 0.18077711760997772, "learning_rate": 1.0676094759566524e-05, "loss": 0.4937, "step": 6488 }, { "epoch": 1.4474682132500558, "grad_norm": 0.1756681501865387, "learning_rate": 1.0673746696625906e-05, "loss": 0.4585, "step": 6489 }, { "epoch": 1.447691278161945, "grad_norm": 0.17320303618907928, "learning_rate": 1.0671398596368961e-05, "loss": 0.4564, "step": 6490 }, { "epoch": 1.4479143430738346, "grad_norm": 0.17476066946983337, "learning_rate": 1.0669050458925736e-05, "loss": 0.4621, "step": 6491 }, { "epoch": 1.4481374079857239, "grad_norm": 0.1720237284898758, "learning_rate": 1.0666702284426289e-05, "loss": 0.4483, "step": 6492 }, { "epoch": 1.4483604728976132, "grad_norm": 0.1799459308385849, "learning_rate": 1.0664354073000676e-05, "loss": 0.4825, "step": 6493 }, { "epoch": 1.4485835378095024, "grad_norm": 0.173868328332901, "learning_rate": 1.0662005824778957e-05, "loss": 0.4586, "step": 6494 }, { "epoch": 1.448806602721392, "grad_norm": 0.17507266998291016, "learning_rate": 1.0659657539891189e-05, "loss": 0.453, "step": 6495 }, { "epoch": 1.4490296676332812, "grad_norm": 0.17579588294029236, "learning_rate": 1.0657309218467437e-05, "loss": 0.4606, "step": 6496 }, { "epoch": 1.4492527325451707, "grad_norm": 0.18230777978897095, "learning_rate": 1.0654960860637766e-05, "loss": 0.4721, "step": 6497 }, { "epoch": 1.44947579745706, "grad_norm": 0.1772591918706894, "learning_rate": 1.0652612466532242e-05, "loss": 0.4642, "step": 6498 }, { "epoch": 1.4496988623689493, "grad_norm": 0.17671100795269012, "learning_rate": 1.0650264036280935e-05, "loss": 0.4526, "step": 6499 }, { "epoch": 1.4499219272808388, "grad_norm": 0.1718316227197647, "learning_rate": 1.0647915570013916e-05, "loss": 0.4592, "step": 6500 }, { "epoch": 1.450144992192728, "grad_norm": 0.17270374298095703, "learning_rate": 1.0645567067861257e-05, "loss": 0.4722, "step": 6501 }, { "epoch": 1.4503680571046176, "grad_norm": 0.1727593094110489, "learning_rate": 1.0643218529953032e-05, "loss": 0.4519, "step": 6502 }, { "epoch": 1.4505911220165069, "grad_norm": 0.18215766549110413, "learning_rate": 1.0640869956419321e-05, "loss": 0.4584, "step": 6503 }, { "epoch": 1.4508141869283961, "grad_norm": 0.17123596370220184, "learning_rate": 1.0638521347390198e-05, "loss": 0.4829, "step": 6504 }, { "epoch": 1.4510372518402854, "grad_norm": 0.17143850028514862, "learning_rate": 1.063617270299575e-05, "loss": 0.4703, "step": 6505 }, { "epoch": 1.451260316752175, "grad_norm": 0.17444929480552673, "learning_rate": 1.0633824023366053e-05, "loss": 0.4424, "step": 6506 }, { "epoch": 1.4514833816640642, "grad_norm": 0.17457221448421478, "learning_rate": 1.0631475308631196e-05, "loss": 0.4759, "step": 6507 }, { "epoch": 1.4517064465759537, "grad_norm": 0.16844333708286285, "learning_rate": 1.0629126558921264e-05, "loss": 0.4622, "step": 6508 }, { "epoch": 1.451929511487843, "grad_norm": 0.17886194586753845, "learning_rate": 1.0626777774366347e-05, "loss": 0.4502, "step": 6509 }, { "epoch": 1.4521525763997323, "grad_norm": 0.17846544086933136, "learning_rate": 1.062442895509653e-05, "loss": 0.4472, "step": 6510 }, { "epoch": 1.4523756413116216, "grad_norm": 0.19240468740463257, "learning_rate": 1.0622080101241914e-05, "loss": 0.4895, "step": 6511 }, { "epoch": 1.452598706223511, "grad_norm": 0.17172129452228546, "learning_rate": 1.061973121293259e-05, "loss": 0.4443, "step": 6512 }, { "epoch": 1.4528217711354003, "grad_norm": 0.16445989906787872, "learning_rate": 1.0617382290298649e-05, "loss": 0.4089, "step": 6513 }, { "epoch": 1.4530448360472898, "grad_norm": 0.1770135909318924, "learning_rate": 1.0615033333470194e-05, "loss": 0.4726, "step": 6514 }, { "epoch": 1.4532679009591791, "grad_norm": 0.16954918205738068, "learning_rate": 1.0612684342577326e-05, "loss": 0.4364, "step": 6515 }, { "epoch": 1.4534909658710684, "grad_norm": 0.16754356026649475, "learning_rate": 1.0610335317750144e-05, "loss": 0.433, "step": 6516 }, { "epoch": 1.453714030782958, "grad_norm": 0.17707470059394836, "learning_rate": 1.0607986259118752e-05, "loss": 0.4817, "step": 6517 }, { "epoch": 1.4539370956948472, "grad_norm": 0.16972193121910095, "learning_rate": 1.0605637166813261e-05, "loss": 0.4376, "step": 6518 }, { "epoch": 1.4541601606067367, "grad_norm": 0.1755881905555725, "learning_rate": 1.0603288040963768e-05, "loss": 0.4354, "step": 6519 }, { "epoch": 1.454383225518626, "grad_norm": 0.17727094888687134, "learning_rate": 1.0600938881700394e-05, "loss": 0.4519, "step": 6520 }, { "epoch": 1.4546062904305153, "grad_norm": 0.17010506987571716, "learning_rate": 1.059858968915324e-05, "loss": 0.4579, "step": 6521 }, { "epoch": 1.4548293553424045, "grad_norm": 0.1777375340461731, "learning_rate": 1.0596240463452427e-05, "loss": 0.4646, "step": 6522 }, { "epoch": 1.455052420254294, "grad_norm": 0.17490847408771515, "learning_rate": 1.0593891204728064e-05, "loss": 0.4906, "step": 6523 }, { "epoch": 1.4552754851661833, "grad_norm": 0.16298337280750275, "learning_rate": 1.0591541913110273e-05, "loss": 0.4104, "step": 6524 }, { "epoch": 1.4554985500780728, "grad_norm": 0.1771763265132904, "learning_rate": 1.0589192588729167e-05, "loss": 0.4829, "step": 6525 }, { "epoch": 1.455721614989962, "grad_norm": 0.16941291093826294, "learning_rate": 1.0586843231714874e-05, "loss": 0.4469, "step": 6526 }, { "epoch": 1.4559446799018514, "grad_norm": 0.16429558396339417, "learning_rate": 1.0584493842197505e-05, "loss": 0.4358, "step": 6527 }, { "epoch": 1.4561677448137407, "grad_norm": 0.18524731695652008, "learning_rate": 1.0582144420307196e-05, "loss": 0.4476, "step": 6528 }, { "epoch": 1.4563908097256302, "grad_norm": 0.18424685299396515, "learning_rate": 1.0579794966174064e-05, "loss": 0.4621, "step": 6529 }, { "epoch": 1.4566138746375195, "grad_norm": 0.17425791919231415, "learning_rate": 1.0577445479928239e-05, "loss": 0.4229, "step": 6530 }, { "epoch": 1.456836939549409, "grad_norm": 0.1773643046617508, "learning_rate": 1.0575095961699856e-05, "loss": 0.4825, "step": 6531 }, { "epoch": 1.4570600044612982, "grad_norm": 0.1699049323797226, "learning_rate": 1.0572746411619034e-05, "loss": 0.448, "step": 6532 }, { "epoch": 1.4572830693731875, "grad_norm": 0.1901503950357437, "learning_rate": 1.0570396829815919e-05, "loss": 0.4498, "step": 6533 }, { "epoch": 1.457506134285077, "grad_norm": 0.1757887601852417, "learning_rate": 1.0568047216420636e-05, "loss": 0.4576, "step": 6534 }, { "epoch": 1.4577291991969663, "grad_norm": 0.17276814579963684, "learning_rate": 1.056569757156333e-05, "loss": 0.4685, "step": 6535 }, { "epoch": 1.4579522641088558, "grad_norm": 0.1706877201795578, "learning_rate": 1.0563347895374127e-05, "loss": 0.4404, "step": 6536 }, { "epoch": 1.458175329020745, "grad_norm": 0.1756119430065155, "learning_rate": 1.056099818798318e-05, "loss": 0.485, "step": 6537 }, { "epoch": 1.4583983939326344, "grad_norm": 0.17187528312206268, "learning_rate": 1.0558648449520621e-05, "loss": 0.4528, "step": 6538 }, { "epoch": 1.4586214588445237, "grad_norm": 0.17733196914196014, "learning_rate": 1.05562986801166e-05, "loss": 0.4607, "step": 6539 }, { "epoch": 1.4588445237564132, "grad_norm": 0.1726623773574829, "learning_rate": 1.0553948879901255e-05, "loss": 0.4593, "step": 6540 }, { "epoch": 1.4590675886683024, "grad_norm": 0.1699986606836319, "learning_rate": 1.0551599049004738e-05, "loss": 0.4614, "step": 6541 }, { "epoch": 1.459290653580192, "grad_norm": 0.17713840305805206, "learning_rate": 1.0549249187557196e-05, "loss": 0.4911, "step": 6542 }, { "epoch": 1.4595137184920812, "grad_norm": 0.17454893887043, "learning_rate": 1.054689929568878e-05, "loss": 0.469, "step": 6543 }, { "epoch": 1.4597367834039705, "grad_norm": 0.17392531037330627, "learning_rate": 1.054454937352964e-05, "loss": 0.4355, "step": 6544 }, { "epoch": 1.4599598483158598, "grad_norm": 0.16687026619911194, "learning_rate": 1.0542199421209931e-05, "loss": 0.456, "step": 6545 }, { "epoch": 1.4601829132277493, "grad_norm": 0.18140020966529846, "learning_rate": 1.0539849438859806e-05, "loss": 0.4495, "step": 6546 }, { "epoch": 1.4604059781396386, "grad_norm": 0.1847231388092041, "learning_rate": 1.0537499426609426e-05, "loss": 0.4382, "step": 6547 }, { "epoch": 1.460629043051528, "grad_norm": 0.17485037446022034, "learning_rate": 1.0535149384588943e-05, "loss": 0.428, "step": 6548 }, { "epoch": 1.4608521079634174, "grad_norm": 0.17149090766906738, "learning_rate": 1.0532799312928525e-05, "loss": 0.4624, "step": 6549 }, { "epoch": 1.4610751728753066, "grad_norm": 0.17815500497817993, "learning_rate": 1.0530449211758327e-05, "loss": 0.4581, "step": 6550 }, { "epoch": 1.4612982377871961, "grad_norm": 0.16746407747268677, "learning_rate": 1.0528099081208514e-05, "loss": 0.4839, "step": 6551 }, { "epoch": 1.4615213026990854, "grad_norm": 0.17022733390331268, "learning_rate": 1.0525748921409256e-05, "loss": 0.4636, "step": 6552 }, { "epoch": 1.461744367610975, "grad_norm": 0.1741877943277359, "learning_rate": 1.0523398732490712e-05, "loss": 0.4602, "step": 6553 }, { "epoch": 1.4619674325228642, "grad_norm": 0.16914743185043335, "learning_rate": 1.0521048514583057e-05, "loss": 0.4616, "step": 6554 }, { "epoch": 1.4621904974347535, "grad_norm": 0.16964775323867798, "learning_rate": 1.0518698267816454e-05, "loss": 0.4738, "step": 6555 }, { "epoch": 1.4624135623466428, "grad_norm": 0.17116379737854004, "learning_rate": 1.0516347992321081e-05, "loss": 0.4588, "step": 6556 }, { "epoch": 1.4626366272585323, "grad_norm": 0.22291259467601776, "learning_rate": 1.0513997688227107e-05, "loss": 0.4697, "step": 6557 }, { "epoch": 1.4628596921704216, "grad_norm": 0.18003083765506744, "learning_rate": 1.051164735566471e-05, "loss": 0.4628, "step": 6558 }, { "epoch": 1.463082757082311, "grad_norm": 0.1763797551393509, "learning_rate": 1.050929699476406e-05, "loss": 0.4565, "step": 6559 }, { "epoch": 1.4633058219942003, "grad_norm": 0.1819460391998291, "learning_rate": 1.0506946605655342e-05, "loss": 0.483, "step": 6560 }, { "epoch": 1.4635288869060896, "grad_norm": 0.18002116680145264, "learning_rate": 1.050459618846873e-05, "loss": 0.4485, "step": 6561 }, { "epoch": 1.463751951817979, "grad_norm": 0.1870141327381134, "learning_rate": 1.0502245743334409e-05, "loss": 0.4761, "step": 6562 }, { "epoch": 1.4639750167298684, "grad_norm": 0.16674816608428955, "learning_rate": 1.0499895270382558e-05, "loss": 0.4615, "step": 6563 }, { "epoch": 1.4641980816417577, "grad_norm": 0.18010953068733215, "learning_rate": 1.0497544769743362e-05, "loss": 0.4677, "step": 6564 }, { "epoch": 1.4644211465536472, "grad_norm": 0.17383818328380585, "learning_rate": 1.049519424154701e-05, "loss": 0.4165, "step": 6565 }, { "epoch": 1.4646442114655365, "grad_norm": 0.17807599902153015, "learning_rate": 1.0492843685923684e-05, "loss": 0.4671, "step": 6566 }, { "epoch": 1.4648672763774258, "grad_norm": 0.1714266836643219, "learning_rate": 1.0490493103003573e-05, "loss": 0.4445, "step": 6567 }, { "epoch": 1.4650903412893153, "grad_norm": 0.18563859164714813, "learning_rate": 1.048814249291687e-05, "loss": 0.4694, "step": 6568 }, { "epoch": 1.4653134062012045, "grad_norm": 0.1768602728843689, "learning_rate": 1.0485791855793768e-05, "loss": 0.4529, "step": 6569 }, { "epoch": 1.465536471113094, "grad_norm": 0.16756857931613922, "learning_rate": 1.0483441191764453e-05, "loss": 0.4739, "step": 6570 }, { "epoch": 1.4657595360249833, "grad_norm": 0.16969642043113708, "learning_rate": 1.0481090500959125e-05, "loss": 0.4815, "step": 6571 }, { "epoch": 1.4659826009368726, "grad_norm": 0.17080920934677124, "learning_rate": 1.047873978350798e-05, "loss": 0.476, "step": 6572 }, { "epoch": 1.4662056658487619, "grad_norm": 0.17539383471012115, "learning_rate": 1.0476389039541214e-05, "loss": 0.4739, "step": 6573 }, { "epoch": 1.4664287307606514, "grad_norm": 0.1617891639471054, "learning_rate": 1.0474038269189026e-05, "loss": 0.4419, "step": 6574 }, { "epoch": 1.4666517956725407, "grad_norm": 0.1763634830713272, "learning_rate": 1.0471687472581617e-05, "loss": 0.4513, "step": 6575 }, { "epoch": 1.4668748605844302, "grad_norm": 0.16819307208061218, "learning_rate": 1.046933664984919e-05, "loss": 0.4533, "step": 6576 }, { "epoch": 1.4670979254963195, "grad_norm": 0.16723337769508362, "learning_rate": 1.0466985801121948e-05, "loss": 0.4588, "step": 6577 }, { "epoch": 1.4673209904082087, "grad_norm": 0.16584204137325287, "learning_rate": 1.046463492653009e-05, "loss": 0.432, "step": 6578 }, { "epoch": 1.467544055320098, "grad_norm": 0.1695747673511505, "learning_rate": 1.0462284026203831e-05, "loss": 0.4584, "step": 6579 }, { "epoch": 1.4677671202319875, "grad_norm": 0.17204824090003967, "learning_rate": 1.0459933100273371e-05, "loss": 0.4653, "step": 6580 }, { "epoch": 1.4679901851438768, "grad_norm": 0.18302413821220398, "learning_rate": 1.0457582148868928e-05, "loss": 0.4231, "step": 6581 }, { "epoch": 1.4682132500557663, "grad_norm": 0.17187362909317017, "learning_rate": 1.0455231172120704e-05, "loss": 0.4444, "step": 6582 }, { "epoch": 1.4684363149676556, "grad_norm": 0.16787424683570862, "learning_rate": 1.0452880170158914e-05, "loss": 0.4393, "step": 6583 }, { "epoch": 1.4686593798795449, "grad_norm": 0.16989830136299133, "learning_rate": 1.0450529143113772e-05, "loss": 0.4475, "step": 6584 }, { "epoch": 1.4688824447914344, "grad_norm": 0.17312617599964142, "learning_rate": 1.0448178091115493e-05, "loss": 0.4664, "step": 6585 }, { "epoch": 1.4691055097033237, "grad_norm": 0.16683322191238403, "learning_rate": 1.0445827014294292e-05, "loss": 0.4654, "step": 6586 }, { "epoch": 1.4693285746152132, "grad_norm": 0.18341569602489471, "learning_rate": 1.0443475912780386e-05, "loss": 0.4659, "step": 6587 }, { "epoch": 1.4695516395271024, "grad_norm": 0.18148088455200195, "learning_rate": 1.0441124786703996e-05, "loss": 0.4771, "step": 6588 }, { "epoch": 1.4697747044389917, "grad_norm": 0.1746566742658615, "learning_rate": 1.0438773636195336e-05, "loss": 0.4601, "step": 6589 }, { "epoch": 1.469997769350881, "grad_norm": 0.1989150047302246, "learning_rate": 1.0436422461384636e-05, "loss": 0.4603, "step": 6590 }, { "epoch": 1.4702208342627705, "grad_norm": 0.1698281466960907, "learning_rate": 1.0434071262402114e-05, "loss": 0.4584, "step": 6591 }, { "epoch": 1.4704438991746598, "grad_norm": 0.1744074821472168, "learning_rate": 1.0431720039377998e-05, "loss": 0.4839, "step": 6592 }, { "epoch": 1.4706669640865493, "grad_norm": 0.1633480042219162, "learning_rate": 1.0429368792442507e-05, "loss": 0.4426, "step": 6593 }, { "epoch": 1.4708900289984386, "grad_norm": 0.17857415974140167, "learning_rate": 1.0427017521725873e-05, "loss": 0.4595, "step": 6594 }, { "epoch": 1.4711130939103279, "grad_norm": 0.1769183874130249, "learning_rate": 1.0424666227358323e-05, "loss": 0.4862, "step": 6595 }, { "epoch": 1.4713361588222171, "grad_norm": 0.16599570214748383, "learning_rate": 1.0422314909470082e-05, "loss": 0.4185, "step": 6596 }, { "epoch": 1.4715592237341066, "grad_norm": 0.16501422226428986, "learning_rate": 1.0419963568191389e-05, "loss": 0.4314, "step": 6597 }, { "epoch": 1.471782288645996, "grad_norm": 0.17242339253425598, "learning_rate": 1.041761220365247e-05, "loss": 0.4411, "step": 6598 }, { "epoch": 1.4720053535578854, "grad_norm": 0.1672823429107666, "learning_rate": 1.0415260815983561e-05, "loss": 0.4531, "step": 6599 }, { "epoch": 1.4722284184697747, "grad_norm": 0.1693340539932251, "learning_rate": 1.0412909405314896e-05, "loss": 0.4494, "step": 6600 }, { "epoch": 1.472451483381664, "grad_norm": 0.17069166898727417, "learning_rate": 1.0410557971776711e-05, "loss": 0.4587, "step": 6601 }, { "epoch": 1.4726745482935535, "grad_norm": 0.1749400794506073, "learning_rate": 1.0408206515499242e-05, "loss": 0.4258, "step": 6602 }, { "epoch": 1.4728976132054428, "grad_norm": 0.17911866307258606, "learning_rate": 1.0405855036612728e-05, "loss": 0.4718, "step": 6603 }, { "epoch": 1.4731206781173323, "grad_norm": 0.1690555214881897, "learning_rate": 1.040350353524741e-05, "loss": 0.4594, "step": 6604 }, { "epoch": 1.4733437430292216, "grad_norm": 0.1977461576461792, "learning_rate": 1.0401152011533531e-05, "loss": 0.4651, "step": 6605 }, { "epoch": 1.4735668079411108, "grad_norm": 0.17112137377262115, "learning_rate": 1.0398800465601327e-05, "loss": 0.47, "step": 6606 }, { "epoch": 1.4737898728530001, "grad_norm": 0.20035414397716522, "learning_rate": 1.0396448897581043e-05, "loss": 0.4539, "step": 6607 }, { "epoch": 1.4740129377648896, "grad_norm": 0.1666097790002823, "learning_rate": 1.0394097307602928e-05, "loss": 0.4328, "step": 6608 }, { "epoch": 1.474236002676779, "grad_norm": 0.17964059114456177, "learning_rate": 1.0391745695797226e-05, "loss": 0.4684, "step": 6609 }, { "epoch": 1.4744590675886684, "grad_norm": 0.1727062463760376, "learning_rate": 1.038939406229418e-05, "loss": 0.435, "step": 6610 }, { "epoch": 1.4746821325005577, "grad_norm": 0.18273860216140747, "learning_rate": 1.0387042407224046e-05, "loss": 0.4723, "step": 6611 }, { "epoch": 1.474905197412447, "grad_norm": 0.1780747026205063, "learning_rate": 1.0384690730717065e-05, "loss": 0.4544, "step": 6612 }, { "epoch": 1.4751282623243362, "grad_norm": 0.17232657968997955, "learning_rate": 1.0382339032903492e-05, "loss": 0.4507, "step": 6613 }, { "epoch": 1.4753513272362258, "grad_norm": 0.17787334322929382, "learning_rate": 1.037998731391358e-05, "loss": 0.4654, "step": 6614 }, { "epoch": 1.475574392148115, "grad_norm": 0.17883604764938354, "learning_rate": 1.0377635573877581e-05, "loss": 0.4569, "step": 6615 }, { "epoch": 1.4757974570600045, "grad_norm": 0.172362819314003, "learning_rate": 1.0375283812925745e-05, "loss": 0.4433, "step": 6616 }, { "epoch": 1.4760205219718938, "grad_norm": 0.17884163558483124, "learning_rate": 1.037293203118833e-05, "loss": 0.437, "step": 6617 }, { "epoch": 1.476243586883783, "grad_norm": 0.1667267084121704, "learning_rate": 1.0370580228795597e-05, "loss": 0.4556, "step": 6618 }, { "epoch": 1.4764666517956726, "grad_norm": 0.17479191720485687, "learning_rate": 1.0368228405877799e-05, "loss": 0.4462, "step": 6619 }, { "epoch": 1.4766897167075619, "grad_norm": 0.1751541793346405, "learning_rate": 1.0365876562565195e-05, "loss": 0.4504, "step": 6620 }, { "epoch": 1.4769127816194514, "grad_norm": 0.20716704428195953, "learning_rate": 1.0363524698988045e-05, "loss": 0.4626, "step": 6621 }, { "epoch": 1.4771358465313407, "grad_norm": 0.16398191452026367, "learning_rate": 1.0361172815276615e-05, "loss": 0.4581, "step": 6622 }, { "epoch": 1.47735891144323, "grad_norm": 0.17340189218521118, "learning_rate": 1.0358820911561157e-05, "loss": 0.4472, "step": 6623 }, { "epoch": 1.4775819763551192, "grad_norm": 0.18412639200687408, "learning_rate": 1.0356468987971944e-05, "loss": 0.4323, "step": 6624 }, { "epoch": 1.4778050412670087, "grad_norm": 0.16925156116485596, "learning_rate": 1.0354117044639232e-05, "loss": 0.4324, "step": 6625 }, { "epoch": 1.478028106178898, "grad_norm": 0.1772768348455429, "learning_rate": 1.0351765081693294e-05, "loss": 0.4574, "step": 6626 }, { "epoch": 1.4782511710907875, "grad_norm": 0.1698581427335739, "learning_rate": 1.0349413099264391e-05, "loss": 0.4748, "step": 6627 }, { "epoch": 1.4784742360026768, "grad_norm": 0.1687650978565216, "learning_rate": 1.0347061097482794e-05, "loss": 0.4226, "step": 6628 }, { "epoch": 1.478697300914566, "grad_norm": 0.18025682866573334, "learning_rate": 1.034470907647877e-05, "loss": 0.4615, "step": 6629 }, { "epoch": 1.4789203658264554, "grad_norm": 0.18218332529067993, "learning_rate": 1.034235703638259e-05, "loss": 0.4393, "step": 6630 }, { "epoch": 1.4791434307383449, "grad_norm": 0.17049945890903473, "learning_rate": 1.0340004977324521e-05, "loss": 0.4613, "step": 6631 }, { "epoch": 1.4793664956502341, "grad_norm": 0.18157726526260376, "learning_rate": 1.033765289943484e-05, "loss": 0.4811, "step": 6632 }, { "epoch": 1.4795895605621237, "grad_norm": 0.17979174852371216, "learning_rate": 1.0335300802843815e-05, "loss": 0.4478, "step": 6633 }, { "epoch": 1.479812625474013, "grad_norm": 0.17775587737560272, "learning_rate": 1.0332948687681725e-05, "loss": 0.4669, "step": 6634 }, { "epoch": 1.4800356903859022, "grad_norm": 0.1723344624042511, "learning_rate": 1.0330596554078842e-05, "loss": 0.4577, "step": 6635 }, { "epoch": 1.4802587552977917, "grad_norm": 0.1759558767080307, "learning_rate": 1.0328244402165442e-05, "loss": 0.476, "step": 6636 }, { "epoch": 1.480481820209681, "grad_norm": 0.1862598955631256, "learning_rate": 1.0325892232071803e-05, "loss": 0.464, "step": 6637 }, { "epoch": 1.4807048851215705, "grad_norm": 0.18910667300224304, "learning_rate": 1.0323540043928199e-05, "loss": 0.4197, "step": 6638 }, { "epoch": 1.4809279500334598, "grad_norm": 0.16868862509727478, "learning_rate": 1.0321187837864917e-05, "loss": 0.4334, "step": 6639 }, { "epoch": 1.481151014945349, "grad_norm": 0.17516660690307617, "learning_rate": 1.0318835614012228e-05, "loss": 0.4503, "step": 6640 }, { "epoch": 1.4813740798572383, "grad_norm": 0.1748761534690857, "learning_rate": 1.0316483372500422e-05, "loss": 0.4656, "step": 6641 }, { "epoch": 1.4815971447691278, "grad_norm": 0.17153500020503998, "learning_rate": 1.0314131113459772e-05, "loss": 0.4646, "step": 6642 }, { "epoch": 1.4818202096810171, "grad_norm": 0.1690903753042221, "learning_rate": 1.0311778837020565e-05, "loss": 0.4384, "step": 6643 }, { "epoch": 1.4820432745929066, "grad_norm": 0.17170549929141998, "learning_rate": 1.0309426543313086e-05, "loss": 0.4606, "step": 6644 }, { "epoch": 1.482266339504796, "grad_norm": 0.20019586384296417, "learning_rate": 1.030707423246762e-05, "loss": 0.4458, "step": 6645 }, { "epoch": 1.4824894044166852, "grad_norm": 0.17187346518039703, "learning_rate": 1.0304721904614447e-05, "loss": 0.4777, "step": 6646 }, { "epoch": 1.4827124693285745, "grad_norm": 0.17592963576316833, "learning_rate": 1.0302369559883862e-05, "loss": 0.462, "step": 6647 }, { "epoch": 1.482935534240464, "grad_norm": 0.18362174928188324, "learning_rate": 1.0300017198406148e-05, "loss": 0.4851, "step": 6648 }, { "epoch": 1.4831585991523533, "grad_norm": 0.17138110101222992, "learning_rate": 1.0297664820311593e-05, "loss": 0.5046, "step": 6649 }, { "epoch": 1.4833816640642428, "grad_norm": 0.16969837248325348, "learning_rate": 1.029531242573049e-05, "loss": 0.4376, "step": 6650 }, { "epoch": 1.483604728976132, "grad_norm": 0.1832205057144165, "learning_rate": 1.0292960014793126e-05, "loss": 0.4376, "step": 6651 }, { "epoch": 1.4838277938880213, "grad_norm": 0.1777479201555252, "learning_rate": 1.0290607587629795e-05, "loss": 0.4616, "step": 6652 }, { "epoch": 1.4840508587999108, "grad_norm": 0.18245477974414825, "learning_rate": 1.0288255144370784e-05, "loss": 0.4699, "step": 6653 }, { "epoch": 1.4842739237118001, "grad_norm": 0.16267704963684082, "learning_rate": 1.0285902685146394e-05, "loss": 0.4358, "step": 6654 }, { "epoch": 1.4844969886236896, "grad_norm": 0.1723002940416336, "learning_rate": 1.0283550210086913e-05, "loss": 0.4621, "step": 6655 }, { "epoch": 1.484720053535579, "grad_norm": 0.17394044995307922, "learning_rate": 1.028119771932264e-05, "loss": 0.4456, "step": 6656 }, { "epoch": 1.4849431184474682, "grad_norm": 0.1774415820837021, "learning_rate": 1.0278845212983865e-05, "loss": 0.4761, "step": 6657 }, { "epoch": 1.4851661833593575, "grad_norm": 0.16867241263389587, "learning_rate": 1.0276492691200893e-05, "loss": 0.482, "step": 6658 }, { "epoch": 1.485389248271247, "grad_norm": 0.1685202419757843, "learning_rate": 1.027414015410401e-05, "loss": 0.45, "step": 6659 }, { "epoch": 1.4856123131831362, "grad_norm": 0.1738312542438507, "learning_rate": 1.0271787601823526e-05, "loss": 0.4591, "step": 6660 }, { "epoch": 1.4858353780950257, "grad_norm": 0.17828047275543213, "learning_rate": 1.0269435034489733e-05, "loss": 0.4261, "step": 6661 }, { "epoch": 1.486058443006915, "grad_norm": 0.164927139878273, "learning_rate": 1.0267082452232935e-05, "loss": 0.4383, "step": 6662 }, { "epoch": 1.4862815079188043, "grad_norm": 0.17446818947792053, "learning_rate": 1.0264729855183426e-05, "loss": 0.461, "step": 6663 }, { "epoch": 1.4865045728306936, "grad_norm": 0.1646289974451065, "learning_rate": 1.0262377243471517e-05, "loss": 0.4335, "step": 6664 }, { "epoch": 1.486727637742583, "grad_norm": 0.19351711869239807, "learning_rate": 1.0260024617227504e-05, "loss": 0.4697, "step": 6665 }, { "epoch": 1.4869507026544724, "grad_norm": 0.18421606719493866, "learning_rate": 1.0257671976581694e-05, "loss": 0.4531, "step": 6666 }, { "epoch": 1.4871737675663619, "grad_norm": 0.17079128324985504, "learning_rate": 1.0255319321664386e-05, "loss": 0.4531, "step": 6667 }, { "epoch": 1.4873968324782512, "grad_norm": 0.16973423957824707, "learning_rate": 1.0252966652605889e-05, "loss": 0.4358, "step": 6668 }, { "epoch": 1.4876198973901404, "grad_norm": 0.17064902186393738, "learning_rate": 1.0250613969536507e-05, "loss": 0.4843, "step": 6669 }, { "epoch": 1.48784296230203, "grad_norm": 0.1614328771829605, "learning_rate": 1.0248261272586549e-05, "loss": 0.4197, "step": 6670 }, { "epoch": 1.4880660272139192, "grad_norm": 0.1676885336637497, "learning_rate": 1.0245908561886319e-05, "loss": 0.464, "step": 6671 }, { "epoch": 1.4882890921258087, "grad_norm": 0.17245353758335114, "learning_rate": 1.0243555837566124e-05, "loss": 0.4368, "step": 6672 }, { "epoch": 1.488512157037698, "grad_norm": 0.184434175491333, "learning_rate": 1.0241203099756279e-05, "loss": 0.4419, "step": 6673 }, { "epoch": 1.4887352219495873, "grad_norm": 0.17373715341091156, "learning_rate": 1.0238850348587088e-05, "loss": 0.471, "step": 6674 }, { "epoch": 1.4889582868614766, "grad_norm": 0.1708296537399292, "learning_rate": 1.0236497584188862e-05, "loss": 0.4468, "step": 6675 }, { "epoch": 1.489181351773366, "grad_norm": 0.18406598269939423, "learning_rate": 1.023414480669191e-05, "loss": 0.4622, "step": 6676 }, { "epoch": 1.4894044166852554, "grad_norm": 0.17563220858573914, "learning_rate": 1.0231792016226546e-05, "loss": 0.4529, "step": 6677 }, { "epoch": 1.4896274815971449, "grad_norm": 0.21065136790275574, "learning_rate": 1.0229439212923084e-05, "loss": 0.4668, "step": 6678 }, { "epoch": 1.4898505465090341, "grad_norm": 0.18098492920398712, "learning_rate": 1.0227086396911837e-05, "loss": 0.4669, "step": 6679 }, { "epoch": 1.4900736114209234, "grad_norm": 0.18423466384410858, "learning_rate": 1.0224733568323111e-05, "loss": 0.4815, "step": 6680 }, { "epoch": 1.490296676332813, "grad_norm": 0.17648786306381226, "learning_rate": 1.022238072728723e-05, "loss": 0.4566, "step": 6681 }, { "epoch": 1.4905197412447022, "grad_norm": 0.17524605989456177, "learning_rate": 1.0220027873934506e-05, "loss": 0.4466, "step": 6682 }, { "epoch": 1.4907428061565915, "grad_norm": 0.18514925241470337, "learning_rate": 1.021767500839525e-05, "loss": 0.4545, "step": 6683 }, { "epoch": 1.490965871068481, "grad_norm": 0.17596104741096497, "learning_rate": 1.021532213079979e-05, "loss": 0.4637, "step": 6684 }, { "epoch": 1.4911889359803703, "grad_norm": 0.1722065955400467, "learning_rate": 1.021296924127843e-05, "loss": 0.4471, "step": 6685 }, { "epoch": 1.4914120008922596, "grad_norm": 0.1769905835390091, "learning_rate": 1.0210616339961497e-05, "loss": 0.4665, "step": 6686 }, { "epoch": 1.491635065804149, "grad_norm": 0.1704041212797165, "learning_rate": 1.0208263426979304e-05, "loss": 0.437, "step": 6687 }, { "epoch": 1.4918581307160383, "grad_norm": 0.1706121265888214, "learning_rate": 1.0205910502462174e-05, "loss": 0.4533, "step": 6688 }, { "epoch": 1.4920811956279278, "grad_norm": 0.1680435687303543, "learning_rate": 1.0203557566540425e-05, "loss": 0.4465, "step": 6689 }, { "epoch": 1.4923042605398171, "grad_norm": 0.17137907445430756, "learning_rate": 1.0201204619344378e-05, "loss": 0.4712, "step": 6690 }, { "epoch": 1.4925273254517064, "grad_norm": 0.17346073687076569, "learning_rate": 1.0198851661004352e-05, "loss": 0.4396, "step": 6691 }, { "epoch": 1.4927503903635957, "grad_norm": 0.1783093512058258, "learning_rate": 1.0196498691650671e-05, "loss": 0.453, "step": 6692 }, { "epoch": 1.4929734552754852, "grad_norm": 0.16575607657432556, "learning_rate": 1.0194145711413656e-05, "loss": 0.4234, "step": 6693 }, { "epoch": 1.4931965201873745, "grad_norm": 0.17617058753967285, "learning_rate": 1.0191792720423632e-05, "loss": 0.4275, "step": 6694 }, { "epoch": 1.493419585099264, "grad_norm": 0.17400598526000977, "learning_rate": 1.0189439718810919e-05, "loss": 0.4667, "step": 6695 }, { "epoch": 1.4936426500111533, "grad_norm": 0.17919796705245972, "learning_rate": 1.0187086706705844e-05, "loss": 0.4669, "step": 6696 }, { "epoch": 1.4938657149230425, "grad_norm": 0.17682035267353058, "learning_rate": 1.0184733684238728e-05, "loss": 0.4918, "step": 6697 }, { "epoch": 1.494088779834932, "grad_norm": 0.16982607543468475, "learning_rate": 1.01823806515399e-05, "loss": 0.4626, "step": 6698 }, { "epoch": 1.4943118447468213, "grad_norm": 0.17292605340480804, "learning_rate": 1.0180027608739684e-05, "loss": 0.4648, "step": 6699 }, { "epoch": 1.4945349096587106, "grad_norm": 0.16893380880355835, "learning_rate": 1.017767455596841e-05, "loss": 0.4285, "step": 6700 }, { "epoch": 1.4947579745706001, "grad_norm": 0.17039963603019714, "learning_rate": 1.0175321493356396e-05, "loss": 0.4589, "step": 6701 }, { "epoch": 1.4949810394824894, "grad_norm": 0.1750868260860443, "learning_rate": 1.0172968421033977e-05, "loss": 0.4703, "step": 6702 }, { "epoch": 1.4952041043943787, "grad_norm": 0.17685000598430634, "learning_rate": 1.017061533913148e-05, "loss": 0.4585, "step": 6703 }, { "epoch": 1.4954271693062682, "grad_norm": 0.1752578467130661, "learning_rate": 1.0168262247779231e-05, "loss": 0.4583, "step": 6704 }, { "epoch": 1.4956502342181575, "grad_norm": 0.16632848978042603, "learning_rate": 1.0165909147107563e-05, "loss": 0.4684, "step": 6705 }, { "epoch": 1.495873299130047, "grad_norm": 0.17605751752853394, "learning_rate": 1.0163556037246798e-05, "loss": 0.4747, "step": 6706 }, { "epoch": 1.4960963640419362, "grad_norm": 0.16805213689804077, "learning_rate": 1.0161202918327276e-05, "loss": 0.4272, "step": 6707 }, { "epoch": 1.4963194289538255, "grad_norm": 0.25550636649131775, "learning_rate": 1.0158849790479318e-05, "loss": 0.4756, "step": 6708 }, { "epoch": 1.4965424938657148, "grad_norm": 0.1749117523431778, "learning_rate": 1.015649665383326e-05, "loss": 0.4746, "step": 6709 }, { "epoch": 1.4967655587776043, "grad_norm": 0.17432990670204163, "learning_rate": 1.0154143508519434e-05, "loss": 0.4707, "step": 6710 }, { "epoch": 1.4969886236894936, "grad_norm": 0.16568134725093842, "learning_rate": 1.0151790354668171e-05, "loss": 0.4359, "step": 6711 }, { "epoch": 1.497211688601383, "grad_norm": 0.17027510702610016, "learning_rate": 1.0149437192409803e-05, "loss": 0.4352, "step": 6712 }, { "epoch": 1.4974347535132724, "grad_norm": 0.17918628454208374, "learning_rate": 1.0147084021874664e-05, "loss": 0.4749, "step": 6713 }, { "epoch": 1.4976578184251617, "grad_norm": 0.1729065179824829, "learning_rate": 1.0144730843193086e-05, "loss": 0.4465, "step": 6714 }, { "epoch": 1.4978808833370512, "grad_norm": 0.17687968909740448, "learning_rate": 1.0142377656495405e-05, "loss": 0.4861, "step": 6715 }, { "epoch": 1.4981039482489404, "grad_norm": 0.17104387283325195, "learning_rate": 1.0140024461911955e-05, "loss": 0.4529, "step": 6716 }, { "epoch": 1.4983270131608297, "grad_norm": 0.1739518642425537, "learning_rate": 1.0137671259573066e-05, "loss": 0.4493, "step": 6717 }, { "epoch": 1.4985500780727192, "grad_norm": 0.17664003372192383, "learning_rate": 1.013531804960908e-05, "loss": 0.4567, "step": 6718 }, { "epoch": 1.4987731429846085, "grad_norm": 0.16971804201602936, "learning_rate": 1.0132964832150325e-05, "loss": 0.4546, "step": 6719 }, { "epoch": 1.4989962078964978, "grad_norm": 0.17096221446990967, "learning_rate": 1.0130611607327144e-05, "loss": 0.4399, "step": 6720 }, { "epoch": 1.4992192728083873, "grad_norm": 0.16578656435012817, "learning_rate": 1.0128258375269868e-05, "loss": 0.4558, "step": 6721 }, { "epoch": 1.4994423377202766, "grad_norm": 0.17407840490341187, "learning_rate": 1.012590513610884e-05, "loss": 0.4345, "step": 6722 }, { "epoch": 1.499665402632166, "grad_norm": 0.16804394125938416, "learning_rate": 1.012355188997439e-05, "loss": 0.4497, "step": 6723 }, { "epoch": 1.4998884675440554, "grad_norm": 0.1964014172554016, "learning_rate": 1.0121198636996862e-05, "loss": 0.4412, "step": 6724 }, { "epoch": 1.5001115324559446, "grad_norm": 0.17465969920158386, "learning_rate": 1.011884537730659e-05, "loss": 0.4605, "step": 6725 }, { "epoch": 1.500334597367834, "grad_norm": 0.17849335074424744, "learning_rate": 1.0116492111033916e-05, "loss": 0.4766, "step": 6726 }, { "epoch": 1.5005576622797234, "grad_norm": 0.1719314008951187, "learning_rate": 1.0114138838309171e-05, "loss": 0.4745, "step": 6727 }, { "epoch": 1.5007807271916127, "grad_norm": 0.17707185447216034, "learning_rate": 1.0111785559262703e-05, "loss": 0.4544, "step": 6728 }, { "epoch": 1.5010037921035022, "grad_norm": 0.18315830826759338, "learning_rate": 1.0109432274024846e-05, "loss": 0.4607, "step": 6729 }, { "epoch": 1.5012268570153915, "grad_norm": 0.18333736062049866, "learning_rate": 1.0107078982725942e-05, "loss": 0.4835, "step": 6730 }, { "epoch": 1.5014499219272808, "grad_norm": 0.1704811453819275, "learning_rate": 1.010472568549633e-05, "loss": 0.4337, "step": 6731 }, { "epoch": 1.50167298683917, "grad_norm": 0.1818595677614212, "learning_rate": 1.0102372382466352e-05, "loss": 0.46, "step": 6732 }, { "epoch": 1.5018960517510596, "grad_norm": 0.16688323020935059, "learning_rate": 1.0100019073766344e-05, "loss": 0.4584, "step": 6733 }, { "epoch": 1.502119116662949, "grad_norm": 0.17898161709308624, "learning_rate": 1.0097665759526654e-05, "loss": 0.4605, "step": 6734 }, { "epoch": 1.5023421815748383, "grad_norm": 0.16883964836597443, "learning_rate": 1.009531243987762e-05, "loss": 0.4334, "step": 6735 }, { "epoch": 1.5025652464867276, "grad_norm": 0.16763442754745483, "learning_rate": 1.009295911494958e-05, "loss": 0.4353, "step": 6736 }, { "epoch": 1.502788311398617, "grad_norm": 0.1733693778514862, "learning_rate": 1.0090605784872884e-05, "loss": 0.4385, "step": 6737 }, { "epoch": 1.5030113763105064, "grad_norm": 0.17827312648296356, "learning_rate": 1.0088252449777866e-05, "loss": 0.4436, "step": 6738 }, { "epoch": 1.5032344412223957, "grad_norm": 0.17768245935440063, "learning_rate": 1.0085899109794874e-05, "loss": 0.4745, "step": 6739 }, { "epoch": 1.5034575061342852, "grad_norm": 0.1763843595981598, "learning_rate": 1.0083545765054248e-05, "loss": 0.4604, "step": 6740 }, { "epoch": 1.5036805710461745, "grad_norm": 0.17313605546951294, "learning_rate": 1.0081192415686334e-05, "loss": 0.4513, "step": 6741 }, { "epoch": 1.5039036359580638, "grad_norm": 0.17623473703861237, "learning_rate": 1.0078839061821473e-05, "loss": 0.4541, "step": 6742 }, { "epoch": 1.504126700869953, "grad_norm": 0.17384295165538788, "learning_rate": 1.007648570359001e-05, "loss": 0.454, "step": 6743 }, { "epoch": 1.5043497657818425, "grad_norm": 0.1829373836517334, "learning_rate": 1.0074132341122282e-05, "loss": 0.4691, "step": 6744 }, { "epoch": 1.5045728306937318, "grad_norm": 0.17881907522678375, "learning_rate": 1.0071778974548642e-05, "loss": 0.4705, "step": 6745 }, { "epoch": 1.5047958956056213, "grad_norm": 0.17440833151340485, "learning_rate": 1.0069425603999432e-05, "loss": 0.45, "step": 6746 }, { "epoch": 1.5050189605175106, "grad_norm": 0.17206740379333496, "learning_rate": 1.0067072229604995e-05, "loss": 0.4508, "step": 6747 }, { "epoch": 1.5052420254293999, "grad_norm": 0.18030904233455658, "learning_rate": 1.0064718851495674e-05, "loss": 0.4786, "step": 6748 }, { "epoch": 1.5054650903412892, "grad_norm": 0.17595484852790833, "learning_rate": 1.0062365469801818e-05, "loss": 0.4772, "step": 6749 }, { "epoch": 1.5056881552531787, "grad_norm": 0.1742616444826126, "learning_rate": 1.006001208465377e-05, "loss": 0.4564, "step": 6750 }, { "epoch": 1.5059112201650682, "grad_norm": 0.16961626708507538, "learning_rate": 1.0057658696181875e-05, "loss": 0.4514, "step": 6751 }, { "epoch": 1.5061342850769575, "grad_norm": 0.16909094154834747, "learning_rate": 1.0055305304516477e-05, "loss": 0.4647, "step": 6752 }, { "epoch": 1.5063573499888467, "grad_norm": 0.168659508228302, "learning_rate": 1.0052951909787927e-05, "loss": 0.4883, "step": 6753 }, { "epoch": 1.506580414900736, "grad_norm": 0.16884328424930573, "learning_rate": 1.0050598512126562e-05, "loss": 0.4683, "step": 6754 }, { "epoch": 1.5068034798126255, "grad_norm": 0.17461951076984406, "learning_rate": 1.0048245111662735e-05, "loss": 0.455, "step": 6755 }, { "epoch": 1.5070265447245148, "grad_norm": 0.1754961460828781, "learning_rate": 1.0045891708526796e-05, "loss": 0.4833, "step": 6756 }, { "epoch": 1.5072496096364043, "grad_norm": 0.16815195977687836, "learning_rate": 1.0043538302849078e-05, "loss": 0.4338, "step": 6757 }, { "epoch": 1.5074726745482936, "grad_norm": 0.1777064949274063, "learning_rate": 1.004118489475994e-05, "loss": 0.4582, "step": 6758 }, { "epoch": 1.5076957394601829, "grad_norm": 0.17062366008758545, "learning_rate": 1.0038831484389719e-05, "loss": 0.4427, "step": 6759 }, { "epoch": 1.5079188043720722, "grad_norm": 0.16902603209018707, "learning_rate": 1.003647807186877e-05, "loss": 0.464, "step": 6760 }, { "epoch": 1.5081418692839617, "grad_norm": 0.4289887547492981, "learning_rate": 1.0034124657327433e-05, "loss": 0.4714, "step": 6761 }, { "epoch": 1.5083649341958512, "grad_norm": 0.17374208569526672, "learning_rate": 1.003177124089606e-05, "loss": 0.4568, "step": 6762 }, { "epoch": 1.5085879991077404, "grad_norm": 0.2121153473854065, "learning_rate": 1.0029417822704995e-05, "loss": 0.4444, "step": 6763 }, { "epoch": 1.5088110640196297, "grad_norm": 0.17344814538955688, "learning_rate": 1.0027064402884585e-05, "loss": 0.4663, "step": 6764 }, { "epoch": 1.509034128931519, "grad_norm": 0.18848566710948944, "learning_rate": 1.0024710981565177e-05, "loss": 0.4632, "step": 6765 }, { "epoch": 1.5092571938434083, "grad_norm": 0.16708870232105255, "learning_rate": 1.002235755887712e-05, "loss": 0.4484, "step": 6766 }, { "epoch": 1.5094802587552978, "grad_norm": 0.17515549063682556, "learning_rate": 1.0020004134950765e-05, "loss": 0.4862, "step": 6767 }, { "epoch": 1.5097033236671873, "grad_norm": 0.17192357778549194, "learning_rate": 1.001765070991645e-05, "loss": 0.4601, "step": 6768 }, { "epoch": 1.5099263885790766, "grad_norm": 0.1708642542362213, "learning_rate": 1.001529728390453e-05, "loss": 0.4686, "step": 6769 }, { "epoch": 1.5101494534909659, "grad_norm": 0.17151105403900146, "learning_rate": 1.0012943857045349e-05, "loss": 0.459, "step": 6770 }, { "epoch": 1.5103725184028551, "grad_norm": 0.173813134431839, "learning_rate": 1.001059042946926e-05, "loss": 0.4437, "step": 6771 }, { "epoch": 1.5105955833147446, "grad_norm": 0.17130842804908752, "learning_rate": 1.0008237001306602e-05, "loss": 0.457, "step": 6772 }, { "epoch": 1.510818648226634, "grad_norm": 0.17134280502796173, "learning_rate": 1.000588357268773e-05, "loss": 0.4546, "step": 6773 }, { "epoch": 1.5110417131385234, "grad_norm": 0.17797818779945374, "learning_rate": 1.0003530143742986e-05, "loss": 0.4637, "step": 6774 }, { "epoch": 1.5112647780504127, "grad_norm": 0.16602249443531036, "learning_rate": 1.000117671460272e-05, "loss": 0.4494, "step": 6775 }, { "epoch": 1.511487842962302, "grad_norm": 0.16833782196044922, "learning_rate": 9.998823285397282e-06, "loss": 0.4414, "step": 6776 }, { "epoch": 1.5117109078741913, "grad_norm": 0.1781986951828003, "learning_rate": 9.996469856257018e-06, "loss": 0.444, "step": 6777 }, { "epoch": 1.5119339727860808, "grad_norm": 0.16520027816295624, "learning_rate": 9.994116427312274e-06, "loss": 0.4289, "step": 6778 }, { "epoch": 1.5121570376979703, "grad_norm": 0.17205528914928436, "learning_rate": 9.991762998693401e-06, "loss": 0.4745, "step": 6779 }, { "epoch": 1.5123801026098596, "grad_norm": 0.17844898998737335, "learning_rate": 9.989409570530743e-06, "loss": 0.4685, "step": 6780 }, { "epoch": 1.5126031675217488, "grad_norm": 0.1733841598033905, "learning_rate": 9.987056142954653e-06, "loss": 0.4505, "step": 6781 }, { "epoch": 1.5128262324336381, "grad_norm": 0.16828158497810364, "learning_rate": 9.984702716095472e-06, "loss": 0.4303, "step": 6782 }, { "epoch": 1.5130492973455274, "grad_norm": 0.18432192504405975, "learning_rate": 9.982349290083553e-06, "loss": 0.4657, "step": 6783 }, { "epoch": 1.513272362257417, "grad_norm": 0.17612218856811523, "learning_rate": 9.979995865049239e-06, "loss": 0.4531, "step": 6784 }, { "epoch": 1.5134954271693064, "grad_norm": 0.1782546043395996, "learning_rate": 9.977642441122881e-06, "loss": 0.4442, "step": 6785 }, { "epoch": 1.5137184920811957, "grad_norm": 0.17371070384979248, "learning_rate": 9.975289018434826e-06, "loss": 0.4636, "step": 6786 }, { "epoch": 1.513941556993085, "grad_norm": 0.16479544341564178, "learning_rate": 9.972935597115417e-06, "loss": 0.4454, "step": 6787 }, { "epoch": 1.5141646219049743, "grad_norm": 0.18058109283447266, "learning_rate": 9.970582177295009e-06, "loss": 0.4914, "step": 6788 }, { "epoch": 1.5143876868168638, "grad_norm": 0.17538684606552124, "learning_rate": 9.968228759103943e-06, "loss": 0.4258, "step": 6789 }, { "epoch": 1.514610751728753, "grad_norm": 0.17010897397994995, "learning_rate": 9.965875342672572e-06, "loss": 0.4544, "step": 6790 }, { "epoch": 1.5148338166406425, "grad_norm": 0.1833350658416748, "learning_rate": 9.963521928131234e-06, "loss": 0.4608, "step": 6791 }, { "epoch": 1.5150568815525318, "grad_norm": 0.1769610494375229, "learning_rate": 9.961168515610283e-06, "loss": 0.4679, "step": 6792 }, { "epoch": 1.515279946464421, "grad_norm": 0.17435282468795776, "learning_rate": 9.958815105240064e-06, "loss": 0.4737, "step": 6793 }, { "epoch": 1.5155030113763104, "grad_norm": 0.1680775284767151, "learning_rate": 9.956461697150925e-06, "loss": 0.4635, "step": 6794 }, { "epoch": 1.5157260762881999, "grad_norm": 0.17000484466552734, "learning_rate": 9.954108291473207e-06, "loss": 0.4558, "step": 6795 }, { "epoch": 1.5159491412000894, "grad_norm": 0.17273864150047302, "learning_rate": 9.951754888337265e-06, "loss": 0.4472, "step": 6796 }, { "epoch": 1.5161722061119787, "grad_norm": 0.18519042432308197, "learning_rate": 9.949401487873438e-06, "loss": 0.455, "step": 6797 }, { "epoch": 1.516395271023868, "grad_norm": 0.17533376812934875, "learning_rate": 9.947048090212076e-06, "loss": 0.4684, "step": 6798 }, { "epoch": 1.5166183359357572, "grad_norm": 0.1752437800168991, "learning_rate": 9.944694695483523e-06, "loss": 0.4625, "step": 6799 }, { "epoch": 1.5168414008476465, "grad_norm": 0.17211464047431946, "learning_rate": 9.942341303818128e-06, "loss": 0.443, "step": 6800 }, { "epoch": 1.517064465759536, "grad_norm": 0.170721635222435, "learning_rate": 9.939987915346232e-06, "loss": 0.4211, "step": 6801 }, { "epoch": 1.5172875306714255, "grad_norm": 0.17440038919448853, "learning_rate": 9.937634530198184e-06, "loss": 0.456, "step": 6802 }, { "epoch": 1.5175105955833148, "grad_norm": 0.17179426550865173, "learning_rate": 9.935281148504326e-06, "loss": 0.4487, "step": 6803 }, { "epoch": 1.517733660495204, "grad_norm": 0.17573867738246918, "learning_rate": 9.932927770395007e-06, "loss": 0.4663, "step": 6804 }, { "epoch": 1.5179567254070934, "grad_norm": 0.18278174102306366, "learning_rate": 9.93057439600057e-06, "loss": 0.4471, "step": 6805 }, { "epoch": 1.5181797903189829, "grad_norm": 0.17256884276866913, "learning_rate": 9.92822102545136e-06, "loss": 0.4675, "step": 6806 }, { "epoch": 1.5184028552308722, "grad_norm": 0.19855371117591858, "learning_rate": 9.925867658877718e-06, "loss": 0.4661, "step": 6807 }, { "epoch": 1.5186259201427617, "grad_norm": 0.1643659472465515, "learning_rate": 9.923514296409992e-06, "loss": 0.4457, "step": 6808 }, { "epoch": 1.518848985054651, "grad_norm": 0.16748277842998505, "learning_rate": 9.921160938178529e-06, "loss": 0.4558, "step": 6809 }, { "epoch": 1.5190720499665402, "grad_norm": 0.16805437207221985, "learning_rate": 9.918807584313666e-06, "loss": 0.4283, "step": 6810 }, { "epoch": 1.5192951148784295, "grad_norm": 0.18906736373901367, "learning_rate": 9.916454234945752e-06, "loss": 0.4675, "step": 6811 }, { "epoch": 1.519518179790319, "grad_norm": 0.19310304522514343, "learning_rate": 9.914100890205124e-06, "loss": 0.4689, "step": 6812 }, { "epoch": 1.5197412447022085, "grad_norm": 0.17019645869731903, "learning_rate": 9.911747550222134e-06, "loss": 0.4309, "step": 6813 }, { "epoch": 1.5199643096140978, "grad_norm": 0.16500838100910187, "learning_rate": 9.909394215127118e-06, "loss": 0.4364, "step": 6814 }, { "epoch": 1.520187374525987, "grad_norm": 0.1711840033531189, "learning_rate": 9.90704088505042e-06, "loss": 0.4439, "step": 6815 }, { "epoch": 1.5204104394378763, "grad_norm": 0.17483307421207428, "learning_rate": 9.904687560122381e-06, "loss": 0.493, "step": 6816 }, { "epoch": 1.5206335043497656, "grad_norm": 0.17699526250362396, "learning_rate": 9.90233424047335e-06, "loss": 0.4446, "step": 6817 }, { "epoch": 1.5208565692616551, "grad_norm": 0.18029628694057465, "learning_rate": 9.89998092623366e-06, "loss": 0.4676, "step": 6818 }, { "epoch": 1.5210796341735446, "grad_norm": 0.17293018102645874, "learning_rate": 9.897627617533653e-06, "loss": 0.4501, "step": 6819 }, { "epoch": 1.521302699085434, "grad_norm": 0.17556585371494293, "learning_rate": 9.895274314503674e-06, "loss": 0.4454, "step": 6820 }, { "epoch": 1.5215257639973232, "grad_norm": 0.1644999384880066, "learning_rate": 9.892921017274062e-06, "loss": 0.4197, "step": 6821 }, { "epoch": 1.5217488289092125, "grad_norm": 0.17591945827007294, "learning_rate": 9.890567725975159e-06, "loss": 0.4593, "step": 6822 }, { "epoch": 1.521971893821102, "grad_norm": 0.1673215925693512, "learning_rate": 9.888214440737302e-06, "loss": 0.4442, "step": 6823 }, { "epoch": 1.5221949587329913, "grad_norm": 0.16372933983802795, "learning_rate": 9.885861161690832e-06, "loss": 0.4252, "step": 6824 }, { "epoch": 1.5224180236448808, "grad_norm": 0.17661777138710022, "learning_rate": 9.88350788896609e-06, "loss": 0.4593, "step": 6825 }, { "epoch": 1.52264108855677, "grad_norm": 0.17217707633972168, "learning_rate": 9.881154622693415e-06, "loss": 0.4418, "step": 6826 }, { "epoch": 1.5228641534686593, "grad_norm": 0.1641775518655777, "learning_rate": 9.878801363003143e-06, "loss": 0.4624, "step": 6827 }, { "epoch": 1.5230872183805486, "grad_norm": 0.17330925166606903, "learning_rate": 9.876448110025615e-06, "loss": 0.4916, "step": 6828 }, { "epoch": 1.5233102832924381, "grad_norm": 0.17335113883018494, "learning_rate": 9.874094863891166e-06, "loss": 0.4581, "step": 6829 }, { "epoch": 1.5235333482043276, "grad_norm": 0.18039341270923615, "learning_rate": 9.871741624730134e-06, "loss": 0.4441, "step": 6830 }, { "epoch": 1.523756413116217, "grad_norm": 0.18012328445911407, "learning_rate": 9.869388392672861e-06, "loss": 0.4618, "step": 6831 }, { "epoch": 1.5239794780281062, "grad_norm": 0.17947714030742645, "learning_rate": 9.867035167849678e-06, "loss": 0.4905, "step": 6832 }, { "epoch": 1.5242025429399955, "grad_norm": 0.18116816878318787, "learning_rate": 9.864681950390927e-06, "loss": 0.4649, "step": 6833 }, { "epoch": 1.5244256078518847, "grad_norm": 0.1693393439054489, "learning_rate": 9.862328740426938e-06, "loss": 0.458, "step": 6834 }, { "epoch": 1.5246486727637742, "grad_norm": 0.17401763796806335, "learning_rate": 9.85997553808805e-06, "loss": 0.4719, "step": 6835 }, { "epoch": 1.5248717376756638, "grad_norm": 0.17534461617469788, "learning_rate": 9.857622343504597e-06, "loss": 0.4504, "step": 6836 }, { "epoch": 1.525094802587553, "grad_norm": 0.17880570888519287, "learning_rate": 9.855269156806916e-06, "loss": 0.4832, "step": 6837 }, { "epoch": 1.5253178674994423, "grad_norm": 0.16782738268375397, "learning_rate": 9.852915978125337e-06, "loss": 0.46, "step": 6838 }, { "epoch": 1.5255409324113316, "grad_norm": 0.18301822245121002, "learning_rate": 9.850562807590199e-06, "loss": 0.4459, "step": 6839 }, { "epoch": 1.525763997323221, "grad_norm": 0.1692187637090683, "learning_rate": 9.84820964533183e-06, "loss": 0.4465, "step": 6840 }, { "epoch": 1.5259870622351104, "grad_norm": 0.17732323706150055, "learning_rate": 9.845856491480569e-06, "loss": 0.4374, "step": 6841 }, { "epoch": 1.5262101271469999, "grad_norm": 0.17886339128017426, "learning_rate": 9.843503346166741e-06, "loss": 0.445, "step": 6842 }, { "epoch": 1.5264331920588892, "grad_norm": 0.17064787447452545, "learning_rate": 9.841150209520686e-06, "loss": 0.4343, "step": 6843 }, { "epoch": 1.5266562569707784, "grad_norm": 0.18215104937553406, "learning_rate": 9.83879708167273e-06, "loss": 0.4515, "step": 6844 }, { "epoch": 1.5268793218826677, "grad_norm": 0.17810726165771484, "learning_rate": 9.836443962753205e-06, "loss": 0.4746, "step": 6845 }, { "epoch": 1.5271023867945572, "grad_norm": 0.17213033139705658, "learning_rate": 9.834090852892442e-06, "loss": 0.4731, "step": 6846 }, { "epoch": 1.5273254517064467, "grad_norm": 0.18000519275665283, "learning_rate": 9.831737752220772e-06, "loss": 0.4725, "step": 6847 }, { "epoch": 1.527548516618336, "grad_norm": 0.17720220983028412, "learning_rate": 9.829384660868523e-06, "loss": 0.449, "step": 6848 }, { "epoch": 1.5277715815302253, "grad_norm": 0.1757756471633911, "learning_rate": 9.827031578966026e-06, "loss": 0.4492, "step": 6849 }, { "epoch": 1.5279946464421146, "grad_norm": 0.18418079614639282, "learning_rate": 9.824678506643607e-06, "loss": 0.4514, "step": 6850 }, { "epoch": 1.5282177113540039, "grad_norm": 0.16821002960205078, "learning_rate": 9.822325444031594e-06, "loss": 0.4331, "step": 6851 }, { "epoch": 1.5284407762658934, "grad_norm": 0.16792722046375275, "learning_rate": 9.819972391260318e-06, "loss": 0.4373, "step": 6852 }, { "epoch": 1.5286638411777829, "grad_norm": 0.18326304852962494, "learning_rate": 9.8176193484601e-06, "loss": 0.4577, "step": 6853 }, { "epoch": 1.5288869060896721, "grad_norm": 0.1714770793914795, "learning_rate": 9.815266315761275e-06, "loss": 0.437, "step": 6854 }, { "epoch": 1.5291099710015614, "grad_norm": 0.16261571645736694, "learning_rate": 9.81291329329416e-06, "loss": 0.406, "step": 6855 }, { "epoch": 1.5293330359134507, "grad_norm": 0.17615249752998352, "learning_rate": 9.810560281189085e-06, "loss": 0.4704, "step": 6856 }, { "epoch": 1.5295561008253402, "grad_norm": 0.1748465746641159, "learning_rate": 9.80820727957637e-06, "loss": 0.4763, "step": 6857 }, { "epoch": 1.5297791657372295, "grad_norm": 0.1718483865261078, "learning_rate": 9.805854288586346e-06, "loss": 0.4574, "step": 6858 }, { "epoch": 1.530002230649119, "grad_norm": 0.17926479876041412, "learning_rate": 9.803501308349332e-06, "loss": 0.4556, "step": 6859 }, { "epoch": 1.5302252955610083, "grad_norm": 0.17003634572029114, "learning_rate": 9.801148338995652e-06, "loss": 0.4553, "step": 6860 }, { "epoch": 1.5304483604728976, "grad_norm": 0.17237353324890137, "learning_rate": 9.798795380655625e-06, "loss": 0.4521, "step": 6861 }, { "epoch": 1.5306714253847868, "grad_norm": 0.18084143102169037, "learning_rate": 9.796442433459578e-06, "loss": 0.4592, "step": 6862 }, { "epoch": 1.5308944902966763, "grad_norm": 0.18167221546173096, "learning_rate": 9.794089497537827e-06, "loss": 0.4735, "step": 6863 }, { "epoch": 1.5311175552085659, "grad_norm": 0.18819572031497955, "learning_rate": 9.791736573020697e-06, "loss": 0.4906, "step": 6864 }, { "epoch": 1.5313406201204551, "grad_norm": 0.17070408165454865, "learning_rate": 9.789383660038506e-06, "loss": 0.4242, "step": 6865 }, { "epoch": 1.5315636850323444, "grad_norm": 0.17565836012363434, "learning_rate": 9.787030758721573e-06, "loss": 0.4431, "step": 6866 }, { "epoch": 1.5317867499442337, "grad_norm": 0.174460306763649, "learning_rate": 9.784677869200215e-06, "loss": 0.4487, "step": 6867 }, { "epoch": 1.532009814856123, "grad_norm": 0.17642372846603394, "learning_rate": 9.782324991604751e-06, "loss": 0.469, "step": 6868 }, { "epoch": 1.5322328797680125, "grad_norm": 0.17292480170726776, "learning_rate": 9.779972126065497e-06, "loss": 0.4408, "step": 6869 }, { "epoch": 1.532455944679902, "grad_norm": 0.17120680212974548, "learning_rate": 9.777619272712774e-06, "loss": 0.474, "step": 6870 }, { "epoch": 1.5326790095917913, "grad_norm": 0.16610749065876007, "learning_rate": 9.77526643167689e-06, "loss": 0.4486, "step": 6871 }, { "epoch": 1.5329020745036805, "grad_norm": 0.17615914344787598, "learning_rate": 9.772913603088166e-06, "loss": 0.4654, "step": 6872 }, { "epoch": 1.5331251394155698, "grad_norm": 0.1731085181236267, "learning_rate": 9.770560787076918e-06, "loss": 0.4459, "step": 6873 }, { "epoch": 1.5333482043274593, "grad_norm": 0.17174161970615387, "learning_rate": 9.768207983773455e-06, "loss": 0.4443, "step": 6874 }, { "epoch": 1.5335712692393486, "grad_norm": 0.17707736790180206, "learning_rate": 9.765855193308092e-06, "loss": 0.4707, "step": 6875 }, { "epoch": 1.5337943341512381, "grad_norm": 0.17120012640953064, "learning_rate": 9.763502415811141e-06, "loss": 0.4599, "step": 6876 }, { "epoch": 1.5340173990631274, "grad_norm": 0.18111848831176758, "learning_rate": 9.761149651412915e-06, "loss": 0.4354, "step": 6877 }, { "epoch": 1.5342404639750167, "grad_norm": 0.17889343202114105, "learning_rate": 9.758796900243723e-06, "loss": 0.4521, "step": 6878 }, { "epoch": 1.534463528886906, "grad_norm": 0.1871626377105713, "learning_rate": 9.756444162433876e-06, "loss": 0.4711, "step": 6879 }, { "epoch": 1.5346865937987955, "grad_norm": 0.17596866190433502, "learning_rate": 9.754091438113683e-06, "loss": 0.4566, "step": 6880 }, { "epoch": 1.534909658710685, "grad_norm": 0.17852075397968292, "learning_rate": 9.751738727413453e-06, "loss": 0.4763, "step": 6881 }, { "epoch": 1.5351327236225742, "grad_norm": 0.191118523478508, "learning_rate": 9.749386030463493e-06, "loss": 0.4396, "step": 6882 }, { "epoch": 1.5353557885344635, "grad_norm": 0.19689539074897766, "learning_rate": 9.747033347394113e-06, "loss": 0.4421, "step": 6883 }, { "epoch": 1.5355788534463528, "grad_norm": 0.16877755522727966, "learning_rate": 9.744680678335614e-06, "loss": 0.4185, "step": 6884 }, { "epoch": 1.535801918358242, "grad_norm": 0.1971082091331482, "learning_rate": 9.74232802341831e-06, "loss": 0.4537, "step": 6885 }, { "epoch": 1.5360249832701316, "grad_norm": 0.1687203049659729, "learning_rate": 9.739975382772498e-06, "loss": 0.4524, "step": 6886 }, { "epoch": 1.536248048182021, "grad_norm": 0.169671893119812, "learning_rate": 9.737622756528485e-06, "loss": 0.4467, "step": 6887 }, { "epoch": 1.5364711130939104, "grad_norm": 0.1722356528043747, "learning_rate": 9.735270144816574e-06, "loss": 0.4771, "step": 6888 }, { "epoch": 1.5366941780057997, "grad_norm": 0.17518217861652374, "learning_rate": 9.732917547767069e-06, "loss": 0.4543, "step": 6889 }, { "epoch": 1.536917242917689, "grad_norm": 0.17109224200248718, "learning_rate": 9.730564965510269e-06, "loss": 0.4401, "step": 6890 }, { "epoch": 1.5371403078295784, "grad_norm": 0.1737949699163437, "learning_rate": 9.728212398176476e-06, "loss": 0.4415, "step": 6891 }, { "epoch": 1.5373633727414677, "grad_norm": 0.16947439312934875, "learning_rate": 9.72585984589599e-06, "loss": 0.4723, "step": 6892 }, { "epoch": 1.5375864376533572, "grad_norm": 0.1718679517507553, "learning_rate": 9.723507308799109e-06, "loss": 0.4842, "step": 6893 }, { "epoch": 1.5378095025652465, "grad_norm": 0.1752775013446808, "learning_rate": 9.721154787016135e-06, "loss": 0.4461, "step": 6894 }, { "epoch": 1.5380325674771358, "grad_norm": 0.17731130123138428, "learning_rate": 9.71880228067736e-06, "loss": 0.4566, "step": 6895 }, { "epoch": 1.538255632389025, "grad_norm": 0.22506393492221832, "learning_rate": 9.716449789913087e-06, "loss": 0.4486, "step": 6896 }, { "epoch": 1.5384786973009146, "grad_norm": 0.17493079602718353, "learning_rate": 9.714097314853606e-06, "loss": 0.439, "step": 6897 }, { "epoch": 1.538701762212804, "grad_norm": 0.16993321478366852, "learning_rate": 9.711744855629218e-06, "loss": 0.4549, "step": 6898 }, { "epoch": 1.5389248271246934, "grad_norm": 0.17897512018680573, "learning_rate": 9.709392412370212e-06, "loss": 0.4452, "step": 6899 }, { "epoch": 1.5391478920365826, "grad_norm": 0.16903938353061676, "learning_rate": 9.707039985206878e-06, "loss": 0.4665, "step": 6900 }, { "epoch": 1.539370956948472, "grad_norm": 0.17775236070156097, "learning_rate": 9.704687574269515e-06, "loss": 0.4702, "step": 6901 }, { "epoch": 1.5395940218603612, "grad_norm": 0.17940306663513184, "learning_rate": 9.70233517968841e-06, "loss": 0.4671, "step": 6902 }, { "epoch": 1.5398170867722507, "grad_norm": 0.163747638463974, "learning_rate": 9.699982801593857e-06, "loss": 0.4533, "step": 6903 }, { "epoch": 1.5400401516841402, "grad_norm": 0.1675698310136795, "learning_rate": 9.697630440116141e-06, "loss": 0.4422, "step": 6904 }, { "epoch": 1.5402632165960295, "grad_norm": 0.1684923619031906, "learning_rate": 9.695278095385558e-06, "loss": 0.4439, "step": 6905 }, { "epoch": 1.5404862815079188, "grad_norm": 0.16581715643405914, "learning_rate": 9.692925767532386e-06, "loss": 0.4288, "step": 6906 }, { "epoch": 1.540709346419808, "grad_norm": 0.1779187023639679, "learning_rate": 9.690573456686919e-06, "loss": 0.4589, "step": 6907 }, { "epoch": 1.5409324113316976, "grad_norm": 0.17237865924835205, "learning_rate": 9.688221162979438e-06, "loss": 0.4595, "step": 6908 }, { "epoch": 1.5411554762435868, "grad_norm": 0.17078691720962524, "learning_rate": 9.685868886540233e-06, "loss": 0.4417, "step": 6909 }, { "epoch": 1.5413785411554763, "grad_norm": 0.16685281693935394, "learning_rate": 9.683516627499583e-06, "loss": 0.4311, "step": 6910 }, { "epoch": 1.5416016060673656, "grad_norm": 0.17877590656280518, "learning_rate": 9.681164385987776e-06, "loss": 0.4475, "step": 6911 }, { "epoch": 1.541824670979255, "grad_norm": 0.17893154919147491, "learning_rate": 9.678812162135087e-06, "loss": 0.4585, "step": 6912 }, { "epoch": 1.5420477358911442, "grad_norm": 0.17963162064552307, "learning_rate": 9.676459956071804e-06, "loss": 0.4643, "step": 6913 }, { "epoch": 1.5422708008030337, "grad_norm": 0.16504304111003876, "learning_rate": 9.674107767928202e-06, "loss": 0.4181, "step": 6914 }, { "epoch": 1.5424938657149232, "grad_norm": 0.17094488441944122, "learning_rate": 9.671755597834563e-06, "loss": 0.4739, "step": 6915 }, { "epoch": 1.5427169306268125, "grad_norm": 0.1811668872833252, "learning_rate": 9.669403445921162e-06, "loss": 0.4549, "step": 6916 }, { "epoch": 1.5429399955387018, "grad_norm": 0.17800799012184143, "learning_rate": 9.667051312318277e-06, "loss": 0.4655, "step": 6917 }, { "epoch": 1.543163060450591, "grad_norm": 0.1688285917043686, "learning_rate": 9.664699197156188e-06, "loss": 0.4442, "step": 6918 }, { "epoch": 1.5433861253624803, "grad_norm": 0.17787446081638336, "learning_rate": 9.662347100565163e-06, "loss": 0.4689, "step": 6919 }, { "epoch": 1.5436091902743698, "grad_norm": 0.1820237636566162, "learning_rate": 9.659995022675482e-06, "loss": 0.453, "step": 6920 }, { "epoch": 1.5438322551862593, "grad_norm": 0.18955709040164948, "learning_rate": 9.657642963617414e-06, "loss": 0.4653, "step": 6921 }, { "epoch": 1.5440553200981486, "grad_norm": 0.17111030220985413, "learning_rate": 9.655290923521235e-06, "loss": 0.4436, "step": 6922 }, { "epoch": 1.544278385010038, "grad_norm": 0.17172935605049133, "learning_rate": 9.65293890251721e-06, "loss": 0.4361, "step": 6923 }, { "epoch": 1.5445014499219272, "grad_norm": 0.18299220502376556, "learning_rate": 9.650586900735612e-06, "loss": 0.4775, "step": 6924 }, { "epoch": 1.5447245148338167, "grad_norm": 0.18452848494052887, "learning_rate": 9.648234918306709e-06, "loss": 0.4646, "step": 6925 }, { "epoch": 1.544947579745706, "grad_norm": 0.17104355990886688, "learning_rate": 9.645882955360771e-06, "loss": 0.454, "step": 6926 }, { "epoch": 1.5451706446575955, "grad_norm": 0.18376219272613525, "learning_rate": 9.643531012028061e-06, "loss": 0.4683, "step": 6927 }, { "epoch": 1.5453937095694847, "grad_norm": 0.17010129988193512, "learning_rate": 9.641179088438845e-06, "loss": 0.4417, "step": 6928 }, { "epoch": 1.545616774481374, "grad_norm": 0.167456716299057, "learning_rate": 9.63882718472339e-06, "loss": 0.4114, "step": 6929 }, { "epoch": 1.5458398393932633, "grad_norm": 0.17169076204299927, "learning_rate": 9.636475301011957e-06, "loss": 0.4526, "step": 6930 }, { "epoch": 1.5460629043051528, "grad_norm": 0.16744768619537354, "learning_rate": 9.634123437434807e-06, "loss": 0.4615, "step": 6931 }, { "epoch": 1.5462859692170423, "grad_norm": 0.1830861121416092, "learning_rate": 9.631771594122205e-06, "loss": 0.4554, "step": 6932 }, { "epoch": 1.5465090341289316, "grad_norm": 0.17061203718185425, "learning_rate": 9.629419771204405e-06, "loss": 0.4317, "step": 6933 }, { "epoch": 1.5467320990408209, "grad_norm": 0.1750810146331787, "learning_rate": 9.627067968811671e-06, "loss": 0.4648, "step": 6934 }, { "epoch": 1.5469551639527102, "grad_norm": 0.16664697229862213, "learning_rate": 9.624716187074258e-06, "loss": 0.4555, "step": 6935 }, { "epoch": 1.5471782288645994, "grad_norm": 0.17463943362236023, "learning_rate": 9.622364426122424e-06, "loss": 0.4334, "step": 6936 }, { "epoch": 1.547401293776489, "grad_norm": 0.182994544506073, "learning_rate": 9.620012686086423e-06, "loss": 0.4657, "step": 6937 }, { "epoch": 1.5476243586883784, "grad_norm": 0.18054361641407013, "learning_rate": 9.61766096709651e-06, "loss": 0.4595, "step": 6938 }, { "epoch": 1.5478474236002677, "grad_norm": 0.17179542779922485, "learning_rate": 9.615309269282938e-06, "loss": 0.473, "step": 6939 }, { "epoch": 1.548070488512157, "grad_norm": 0.17718704044818878, "learning_rate": 9.612957592775957e-06, "loss": 0.4708, "step": 6940 }, { "epoch": 1.5482935534240463, "grad_norm": 0.17120683193206787, "learning_rate": 9.610605937705823e-06, "loss": 0.4432, "step": 6941 }, { "epoch": 1.5485166183359358, "grad_norm": 0.17965561151504517, "learning_rate": 9.608254304202776e-06, "loss": 0.439, "step": 6942 }, { "epoch": 1.548739683247825, "grad_norm": 0.17135334014892578, "learning_rate": 9.605902692397074e-06, "loss": 0.4597, "step": 6943 }, { "epoch": 1.5489627481597146, "grad_norm": 0.1764613837003708, "learning_rate": 9.603551102418958e-06, "loss": 0.4478, "step": 6944 }, { "epoch": 1.5491858130716039, "grad_norm": 0.16766582429409027, "learning_rate": 9.601199534398678e-06, "loss": 0.425, "step": 6945 }, { "epoch": 1.5494088779834931, "grad_norm": 0.1771164983510971, "learning_rate": 9.598847988466472e-06, "loss": 0.457, "step": 6946 }, { "epoch": 1.5496319428953824, "grad_norm": 0.1649523228406906, "learning_rate": 9.596496464752593e-06, "loss": 0.4563, "step": 6947 }, { "epoch": 1.549855007807272, "grad_norm": 0.17230331897735596, "learning_rate": 9.594144963387274e-06, "loss": 0.4587, "step": 6948 }, { "epoch": 1.5500780727191614, "grad_norm": 0.17549771070480347, "learning_rate": 9.591793484500762e-06, "loss": 0.4719, "step": 6949 }, { "epoch": 1.5503011376310507, "grad_norm": 0.17885257303714752, "learning_rate": 9.589442028223292e-06, "loss": 0.4479, "step": 6950 }, { "epoch": 1.55052420254294, "grad_norm": 0.1839398443698883, "learning_rate": 9.587090594685108e-06, "loss": 0.4631, "step": 6951 }, { "epoch": 1.5507472674548293, "grad_norm": 0.17899379134178162, "learning_rate": 9.584739184016442e-06, "loss": 0.4705, "step": 6952 }, { "epoch": 1.5509703323667186, "grad_norm": 0.17247112095355988, "learning_rate": 9.582387796347534e-06, "loss": 0.4653, "step": 6953 }, { "epoch": 1.551193397278608, "grad_norm": 0.17476417124271393, "learning_rate": 9.580036431808614e-06, "loss": 0.447, "step": 6954 }, { "epoch": 1.5514164621904976, "grad_norm": 0.17558352649211884, "learning_rate": 9.57768509052992e-06, "loss": 0.4607, "step": 6955 }, { "epoch": 1.5516395271023868, "grad_norm": 0.1909688413143158, "learning_rate": 9.575333772641682e-06, "loss": 0.4619, "step": 6956 }, { "epoch": 1.5518625920142761, "grad_norm": 0.1743474304676056, "learning_rate": 9.572982478274128e-06, "loss": 0.443, "step": 6957 }, { "epoch": 1.5520856569261654, "grad_norm": 0.18644817173480988, "learning_rate": 9.570631207557495e-06, "loss": 0.4533, "step": 6958 }, { "epoch": 1.552308721838055, "grad_norm": 0.17602680623531342, "learning_rate": 9.568279960622004e-06, "loss": 0.4394, "step": 6959 }, { "epoch": 1.5525317867499442, "grad_norm": 0.17043206095695496, "learning_rate": 9.565928737597886e-06, "loss": 0.4551, "step": 6960 }, { "epoch": 1.5527548516618337, "grad_norm": 0.18989066779613495, "learning_rate": 9.563577538615363e-06, "loss": 0.471, "step": 6961 }, { "epoch": 1.552977916573723, "grad_norm": 0.19321848452091217, "learning_rate": 9.561226363804663e-06, "loss": 0.4739, "step": 6962 }, { "epoch": 1.5532009814856123, "grad_norm": 0.17869152128696442, "learning_rate": 9.558875213296006e-06, "loss": 0.4409, "step": 6963 }, { "epoch": 1.5534240463975015, "grad_norm": 0.17526346445083618, "learning_rate": 9.556524087219616e-06, "loss": 0.4514, "step": 6964 }, { "epoch": 1.553647111309391, "grad_norm": 0.17142999172210693, "learning_rate": 9.55417298570571e-06, "loss": 0.4427, "step": 6965 }, { "epoch": 1.5538701762212805, "grad_norm": 0.17005646228790283, "learning_rate": 9.551821908884507e-06, "loss": 0.4434, "step": 6966 }, { "epoch": 1.5540932411331698, "grad_norm": 0.1687459945678711, "learning_rate": 9.549470856886228e-06, "loss": 0.4651, "step": 6967 }, { "epoch": 1.554316306045059, "grad_norm": 0.1726951152086258, "learning_rate": 9.547119829841088e-06, "loss": 0.4582, "step": 6968 }, { "epoch": 1.5545393709569484, "grad_norm": 0.17231762409210205, "learning_rate": 9.544768827879297e-06, "loss": 0.4472, "step": 6969 }, { "epoch": 1.5547624358688377, "grad_norm": 0.179609015583992, "learning_rate": 9.542417851131076e-06, "loss": 0.4563, "step": 6970 }, { "epoch": 1.5549855007807272, "grad_norm": 0.17300112545490265, "learning_rate": 9.540066899726627e-06, "loss": 0.4245, "step": 6971 }, { "epoch": 1.5552085656926167, "grad_norm": 0.19064456224441528, "learning_rate": 9.537715973796172e-06, "loss": 0.4519, "step": 6972 }, { "epoch": 1.555431630604506, "grad_norm": 0.1847352236509323, "learning_rate": 9.535365073469911e-06, "loss": 0.4784, "step": 6973 }, { "epoch": 1.5556546955163952, "grad_norm": 0.18160337209701538, "learning_rate": 9.533014198878057e-06, "loss": 0.4533, "step": 6974 }, { "epoch": 1.5558777604282845, "grad_norm": 0.1749866008758545, "learning_rate": 9.530663350150812e-06, "loss": 0.4607, "step": 6975 }, { "epoch": 1.556100825340174, "grad_norm": 0.1792685091495514, "learning_rate": 9.528312527418384e-06, "loss": 0.4598, "step": 6976 }, { "epoch": 1.5563238902520633, "grad_norm": 0.16535772383213043, "learning_rate": 9.525961730810974e-06, "loss": 0.4733, "step": 6977 }, { "epoch": 1.5565469551639528, "grad_norm": 0.17856188118457794, "learning_rate": 9.523610960458786e-06, "loss": 0.4724, "step": 6978 }, { "epoch": 1.556770020075842, "grad_norm": 0.1673804074525833, "learning_rate": 9.521260216492025e-06, "loss": 0.4281, "step": 6979 }, { "epoch": 1.5569930849877314, "grad_norm": 0.17213106155395508, "learning_rate": 9.518909499040878e-06, "loss": 0.4652, "step": 6980 }, { "epoch": 1.5572161498996206, "grad_norm": 0.1793312132358551, "learning_rate": 9.51655880823555e-06, "loss": 0.4645, "step": 6981 }, { "epoch": 1.5574392148115102, "grad_norm": 0.17523737251758575, "learning_rate": 9.514208144206237e-06, "loss": 0.4459, "step": 6982 }, { "epoch": 1.5576622797233997, "grad_norm": 0.17327353358268738, "learning_rate": 9.511857507083132e-06, "loss": 0.4531, "step": 6983 }, { "epoch": 1.557885344635289, "grad_norm": 0.3019886016845703, "learning_rate": 9.509506896996432e-06, "loss": 0.4868, "step": 6984 }, { "epoch": 1.5581084095471782, "grad_norm": 0.16475124657154083, "learning_rate": 9.50715631407632e-06, "loss": 0.4522, "step": 6985 }, { "epoch": 1.5583314744590675, "grad_norm": 0.17215771973133087, "learning_rate": 9.504805758452996e-06, "loss": 0.4741, "step": 6986 }, { "epoch": 1.558554539370957, "grad_norm": 0.17033734917640686, "learning_rate": 9.502455230256641e-06, "loss": 0.4406, "step": 6987 }, { "epoch": 1.5587776042828463, "grad_norm": 0.17442695796489716, "learning_rate": 9.500104729617447e-06, "loss": 0.4473, "step": 6988 }, { "epoch": 1.5590006691947358, "grad_norm": 0.17998719215393066, "learning_rate": 9.497754256665596e-06, "loss": 0.4747, "step": 6989 }, { "epoch": 1.559223734106625, "grad_norm": 0.18134324252605438, "learning_rate": 9.495403811531274e-06, "loss": 0.457, "step": 6990 }, { "epoch": 1.5594467990185144, "grad_norm": 0.16917896270751953, "learning_rate": 9.493053394344663e-06, "loss": 0.4701, "step": 6991 }, { "epoch": 1.5596698639304036, "grad_norm": 0.18709082901477814, "learning_rate": 9.490703005235945e-06, "loss": 0.4577, "step": 6992 }, { "epoch": 1.5598929288422931, "grad_norm": 0.17913807928562164, "learning_rate": 9.488352644335296e-06, "loss": 0.4391, "step": 6993 }, { "epoch": 1.5601159937541824, "grad_norm": 0.24650318920612335, "learning_rate": 9.486002311772898e-06, "loss": 0.4355, "step": 6994 }, { "epoch": 1.560339058666072, "grad_norm": 0.17876996099948883, "learning_rate": 9.483652007678924e-06, "loss": 0.4741, "step": 6995 }, { "epoch": 1.5605621235779612, "grad_norm": 0.1874217391014099, "learning_rate": 9.48130173218355e-06, "loss": 0.4516, "step": 6996 }, { "epoch": 1.5607851884898505, "grad_norm": 0.1787550449371338, "learning_rate": 9.478951485416948e-06, "loss": 0.4784, "step": 6997 }, { "epoch": 1.5610082534017398, "grad_norm": 0.17699971795082092, "learning_rate": 9.476601267509293e-06, "loss": 0.4708, "step": 6998 }, { "epoch": 1.5612313183136293, "grad_norm": 0.17713846266269684, "learning_rate": 9.474251078590749e-06, "loss": 0.4404, "step": 6999 }, { "epoch": 1.5614543832255188, "grad_norm": 0.1823769062757492, "learning_rate": 9.47190091879149e-06, "loss": 0.428, "step": 7000 }, { "epoch": 1.561677448137408, "grad_norm": 0.17455317080020905, "learning_rate": 9.469550788241678e-06, "loss": 0.4416, "step": 7001 }, { "epoch": 1.5619005130492973, "grad_norm": 0.164621502161026, "learning_rate": 9.467200687071478e-06, "loss": 0.4429, "step": 7002 }, { "epoch": 1.5621235779611866, "grad_norm": 0.1848084181547165, "learning_rate": 9.464850615411059e-06, "loss": 0.5004, "step": 7003 }, { "epoch": 1.5623466428730761, "grad_norm": 0.20320458710193634, "learning_rate": 9.462500573390578e-06, "loss": 0.4704, "step": 7004 }, { "epoch": 1.5625697077849654, "grad_norm": 0.16972646117210388, "learning_rate": 9.460150561140196e-06, "loss": 0.451, "step": 7005 }, { "epoch": 1.562792772696855, "grad_norm": 0.16521897912025452, "learning_rate": 9.457800578790072e-06, "loss": 0.4457, "step": 7006 }, { "epoch": 1.5630158376087442, "grad_norm": 0.18121238052845, "learning_rate": 9.455450626470363e-06, "loss": 0.4887, "step": 7007 }, { "epoch": 1.5632389025206335, "grad_norm": 0.17698867619037628, "learning_rate": 9.453100704311222e-06, "loss": 0.4559, "step": 7008 }, { "epoch": 1.5634619674325227, "grad_norm": 0.18639512360095978, "learning_rate": 9.450750812442808e-06, "loss": 0.4504, "step": 7009 }, { "epoch": 1.5636850323444123, "grad_norm": 0.17181695997714996, "learning_rate": 9.448400950995265e-06, "loss": 0.4383, "step": 7010 }, { "epoch": 1.5639080972563015, "grad_norm": 0.1778135597705841, "learning_rate": 9.446051120098749e-06, "loss": 0.4787, "step": 7011 }, { "epoch": 1.564131162168191, "grad_norm": 0.17463596165180206, "learning_rate": 9.443701319883404e-06, "loss": 0.4513, "step": 7012 }, { "epoch": 1.5643542270800803, "grad_norm": 0.1755073219537735, "learning_rate": 9.441351550479382e-06, "loss": 0.472, "step": 7013 }, { "epoch": 1.5645772919919696, "grad_norm": 0.17314688861370087, "learning_rate": 9.439001812016824e-06, "loss": 0.4824, "step": 7014 }, { "epoch": 1.5648003569038589, "grad_norm": 0.17931319773197174, "learning_rate": 9.436652104625875e-06, "loss": 0.487, "step": 7015 }, { "epoch": 1.5650234218157484, "grad_norm": 0.1748175173997879, "learning_rate": 9.434302428436674e-06, "loss": 0.4322, "step": 7016 }, { "epoch": 1.5652464867276379, "grad_norm": 0.1719309538602829, "learning_rate": 9.431952783579365e-06, "loss": 0.4584, "step": 7017 }, { "epoch": 1.5654695516395272, "grad_norm": 0.1643352508544922, "learning_rate": 9.429603170184085e-06, "loss": 0.4204, "step": 7018 }, { "epoch": 1.5656926165514164, "grad_norm": 0.17811031639575958, "learning_rate": 9.427253588380968e-06, "loss": 0.4827, "step": 7019 }, { "epoch": 1.5659156814633057, "grad_norm": 0.18726174533367157, "learning_rate": 9.42490403830015e-06, "loss": 0.4466, "step": 7020 }, { "epoch": 1.5661387463751952, "grad_norm": 0.1743079274892807, "learning_rate": 9.422554520071763e-06, "loss": 0.4595, "step": 7021 }, { "epoch": 1.5663618112870845, "grad_norm": 0.16648052632808685, "learning_rate": 9.42020503382594e-06, "loss": 0.4399, "step": 7022 }, { "epoch": 1.566584876198974, "grad_norm": 0.1800967901945114, "learning_rate": 9.417855579692807e-06, "loss": 0.4602, "step": 7023 }, { "epoch": 1.5668079411108633, "grad_norm": 0.17495980858802795, "learning_rate": 9.415506157802497e-06, "loss": 0.45, "step": 7024 }, { "epoch": 1.5670310060227526, "grad_norm": 0.1846977323293686, "learning_rate": 9.41315676828513e-06, "loss": 0.4677, "step": 7025 }, { "epoch": 1.5672540709346419, "grad_norm": 0.1642792671918869, "learning_rate": 9.410807411270834e-06, "loss": 0.452, "step": 7026 }, { "epoch": 1.5674771358465314, "grad_norm": 0.18164919316768646, "learning_rate": 9.40845808688973e-06, "loss": 0.457, "step": 7027 }, { "epoch": 1.5677002007584206, "grad_norm": 0.18115800619125366, "learning_rate": 9.40610879527194e-06, "loss": 0.4561, "step": 7028 }, { "epoch": 1.5679232656703102, "grad_norm": 0.17622902989387512, "learning_rate": 9.403759536547577e-06, "loss": 0.4436, "step": 7029 }, { "epoch": 1.5681463305821994, "grad_norm": 0.17321671545505524, "learning_rate": 9.401410310846762e-06, "loss": 0.4635, "step": 7030 }, { "epoch": 1.5683693954940887, "grad_norm": 0.16843074560165405, "learning_rate": 9.39906111829961e-06, "loss": 0.4266, "step": 7031 }, { "epoch": 1.568592460405978, "grad_norm": 0.17982402443885803, "learning_rate": 9.396711959036234e-06, "loss": 0.4572, "step": 7032 }, { "epoch": 1.5688155253178675, "grad_norm": 0.16617652773857117, "learning_rate": 9.394362833186742e-06, "loss": 0.4548, "step": 7033 }, { "epoch": 1.569038590229757, "grad_norm": 0.18477341532707214, "learning_rate": 9.39201374088125e-06, "loss": 0.4708, "step": 7034 }, { "epoch": 1.5692616551416463, "grad_norm": 0.1808331459760666, "learning_rate": 9.389664682249859e-06, "loss": 0.4721, "step": 7035 }, { "epoch": 1.5694847200535356, "grad_norm": 0.16547119617462158, "learning_rate": 9.387315657422677e-06, "loss": 0.4477, "step": 7036 }, { "epoch": 1.5697077849654248, "grad_norm": 0.19338512420654297, "learning_rate": 9.384966666529807e-06, "loss": 0.4559, "step": 7037 }, { "epoch": 1.5699308498773143, "grad_norm": 0.1708751618862152, "learning_rate": 9.382617709701355e-06, "loss": 0.4593, "step": 7038 }, { "epoch": 1.5701539147892036, "grad_norm": 0.17655450105667114, "learning_rate": 9.380268787067416e-06, "loss": 0.452, "step": 7039 }, { "epoch": 1.5703769797010931, "grad_norm": 0.1719549298286438, "learning_rate": 9.377919898758089e-06, "loss": 0.452, "step": 7040 }, { "epoch": 1.5706000446129824, "grad_norm": 0.17467504739761353, "learning_rate": 9.375571044903469e-06, "loss": 0.4699, "step": 7041 }, { "epoch": 1.5708231095248717, "grad_norm": 0.17232942581176758, "learning_rate": 9.373222225633655e-06, "loss": 0.4696, "step": 7042 }, { "epoch": 1.571046174436761, "grad_norm": 0.1764511615037918, "learning_rate": 9.370873441078738e-06, "loss": 0.4655, "step": 7043 }, { "epoch": 1.5712692393486505, "grad_norm": 0.16992317140102386, "learning_rate": 9.368524691368804e-06, "loss": 0.4342, "step": 7044 }, { "epoch": 1.5714923042605398, "grad_norm": 0.1786929816007614, "learning_rate": 9.366175976633949e-06, "loss": 0.4513, "step": 7045 }, { "epoch": 1.5717153691724293, "grad_norm": 0.1684352606534958, "learning_rate": 9.363827297004252e-06, "loss": 0.4698, "step": 7046 }, { "epoch": 1.5719384340843185, "grad_norm": 0.17656061053276062, "learning_rate": 9.3614786526098e-06, "loss": 0.4762, "step": 7047 }, { "epoch": 1.5721614989962078, "grad_norm": 0.1755649596452713, "learning_rate": 9.359130043580679e-06, "loss": 0.471, "step": 7048 }, { "epoch": 1.572384563908097, "grad_norm": 0.1698499619960785, "learning_rate": 9.356781470046968e-06, "loss": 0.4455, "step": 7049 }, { "epoch": 1.5726076288199866, "grad_norm": 0.1733773797750473, "learning_rate": 9.354432932138743e-06, "loss": 0.4295, "step": 7050 }, { "epoch": 1.5728306937318761, "grad_norm": 0.1755225956439972, "learning_rate": 9.352084429986085e-06, "loss": 0.4365, "step": 7051 }, { "epoch": 1.5730537586437654, "grad_norm": 0.16997897624969482, "learning_rate": 9.349735963719065e-06, "loss": 0.439, "step": 7052 }, { "epoch": 1.5732768235556547, "grad_norm": 0.17118623852729797, "learning_rate": 9.34738753346776e-06, "loss": 0.4616, "step": 7053 }, { "epoch": 1.573499888467544, "grad_norm": 0.1747768670320511, "learning_rate": 9.345039139362235e-06, "loss": 0.4594, "step": 7054 }, { "epoch": 1.5737229533794335, "grad_norm": 0.17374879121780396, "learning_rate": 9.342690781532566e-06, "loss": 0.461, "step": 7055 }, { "epoch": 1.5739460182913227, "grad_norm": 0.18289963901042938, "learning_rate": 9.340342460108813e-06, "loss": 0.4668, "step": 7056 }, { "epoch": 1.5741690832032122, "grad_norm": 0.17302124202251434, "learning_rate": 9.337994175221047e-06, "loss": 0.4441, "step": 7057 }, { "epoch": 1.5743921481151015, "grad_norm": 0.17973129451274872, "learning_rate": 9.335645926999324e-06, "loss": 0.4488, "step": 7058 }, { "epoch": 1.5746152130269908, "grad_norm": 0.17324143648147583, "learning_rate": 9.333297715573713e-06, "loss": 0.4394, "step": 7059 }, { "epoch": 1.57483827793888, "grad_norm": 0.17785955965518951, "learning_rate": 9.330949541074265e-06, "loss": 0.4533, "step": 7060 }, { "epoch": 1.5750613428507696, "grad_norm": 0.17939788103103638, "learning_rate": 9.328601403631044e-06, "loss": 0.4592, "step": 7061 }, { "epoch": 1.575284407762659, "grad_norm": 0.18555541336536407, "learning_rate": 9.326253303374099e-06, "loss": 0.4838, "step": 7062 }, { "epoch": 1.5755074726745484, "grad_norm": 0.17354725301265717, "learning_rate": 9.323905240433481e-06, "loss": 0.4543, "step": 7063 }, { "epoch": 1.5757305375864377, "grad_norm": 0.18157054483890533, "learning_rate": 9.321557214939247e-06, "loss": 0.444, "step": 7064 }, { "epoch": 1.575953602498327, "grad_norm": 0.1770789921283722, "learning_rate": 9.319209227021439e-06, "loss": 0.4877, "step": 7065 }, { "epoch": 1.5761766674102162, "grad_norm": 0.18698160350322723, "learning_rate": 9.316861276810105e-06, "loss": 0.453, "step": 7066 }, { "epoch": 1.5763997323221057, "grad_norm": 0.1865304410457611, "learning_rate": 9.314513364435295e-06, "loss": 0.4649, "step": 7067 }, { "epoch": 1.5766227972339952, "grad_norm": 0.18025344610214233, "learning_rate": 9.312165490027044e-06, "loss": 0.4874, "step": 7068 }, { "epoch": 1.5768458621458845, "grad_norm": 0.17585685849189758, "learning_rate": 9.309817653715395e-06, "loss": 0.4716, "step": 7069 }, { "epoch": 1.5770689270577738, "grad_norm": 0.17630046606063843, "learning_rate": 9.307469855630386e-06, "loss": 0.4836, "step": 7070 }, { "epoch": 1.577291991969663, "grad_norm": 0.17560921609401703, "learning_rate": 9.305122095902055e-06, "loss": 0.4392, "step": 7071 }, { "epoch": 1.5775150568815526, "grad_norm": 0.17391790449619293, "learning_rate": 9.30277437466043e-06, "loss": 0.4587, "step": 7072 }, { "epoch": 1.5777381217934419, "grad_norm": 0.17293858528137207, "learning_rate": 9.30042669203555e-06, "loss": 0.4473, "step": 7073 }, { "epoch": 1.5779611867053314, "grad_norm": 0.17845462262630463, "learning_rate": 9.298079048157434e-06, "loss": 0.4684, "step": 7074 }, { "epoch": 1.5781842516172206, "grad_norm": 0.18190234899520874, "learning_rate": 9.295731443156121e-06, "loss": 0.4561, "step": 7075 }, { "epoch": 1.57840731652911, "grad_norm": 0.1686115711927414, "learning_rate": 9.293383877161628e-06, "loss": 0.4418, "step": 7076 }, { "epoch": 1.5786303814409992, "grad_norm": 0.17196792364120483, "learning_rate": 9.291036350303982e-06, "loss": 0.4569, "step": 7077 }, { "epoch": 1.5788534463528887, "grad_norm": 0.17236146330833435, "learning_rate": 9.288688862713201e-06, "loss": 0.4466, "step": 7078 }, { "epoch": 1.5790765112647782, "grad_norm": 0.1745366007089615, "learning_rate": 9.286341414519307e-06, "loss": 0.4356, "step": 7079 }, { "epoch": 1.5792995761766675, "grad_norm": 0.1981036514043808, "learning_rate": 9.283994005852313e-06, "loss": 0.4698, "step": 7080 }, { "epoch": 1.5795226410885568, "grad_norm": 0.17840775847434998, "learning_rate": 9.281646636842235e-06, "loss": 0.4553, "step": 7081 }, { "epoch": 1.579745706000446, "grad_norm": 0.17592334747314453, "learning_rate": 9.279299307619085e-06, "loss": 0.4468, "step": 7082 }, { "epoch": 1.5799687709123353, "grad_norm": 0.17644402384757996, "learning_rate": 9.276952018312874e-06, "loss": 0.4373, "step": 7083 }, { "epoch": 1.5801918358242248, "grad_norm": 0.18005765974521637, "learning_rate": 9.274604769053605e-06, "loss": 0.4825, "step": 7084 }, { "epoch": 1.5804149007361143, "grad_norm": 0.17240746319293976, "learning_rate": 9.272257559971291e-06, "loss": 0.4562, "step": 7085 }, { "epoch": 1.5806379656480036, "grad_norm": 0.171810120344162, "learning_rate": 9.269910391195929e-06, "loss": 0.4314, "step": 7086 }, { "epoch": 1.580861030559893, "grad_norm": 0.33604612946510315, "learning_rate": 9.26756326285752e-06, "loss": 0.4709, "step": 7087 }, { "epoch": 1.5810840954717822, "grad_norm": 0.18000468611717224, "learning_rate": 9.265216175086068e-06, "loss": 0.4858, "step": 7088 }, { "epoch": 1.5813071603836717, "grad_norm": 0.17121154069900513, "learning_rate": 9.262869128011565e-06, "loss": 0.4492, "step": 7089 }, { "epoch": 1.581530225295561, "grad_norm": 0.1873648315668106, "learning_rate": 9.260522121764007e-06, "loss": 0.4501, "step": 7090 }, { "epoch": 1.5817532902074505, "grad_norm": 0.17154662311077118, "learning_rate": 9.258175156473383e-06, "loss": 0.4237, "step": 7091 }, { "epoch": 1.5819763551193398, "grad_norm": 0.17864510416984558, "learning_rate": 9.255828232269689e-06, "loss": 0.4674, "step": 7092 }, { "epoch": 1.582199420031229, "grad_norm": 0.18545754253864288, "learning_rate": 9.253481349282906e-06, "loss": 0.4657, "step": 7093 }, { "epoch": 1.5824224849431183, "grad_norm": 0.18193696439266205, "learning_rate": 9.251134507643022e-06, "loss": 0.4538, "step": 7094 }, { "epoch": 1.5826455498550078, "grad_norm": 0.17298151552677155, "learning_rate": 9.248787707480018e-06, "loss": 0.4696, "step": 7095 }, { "epoch": 1.5828686147668973, "grad_norm": 0.17081047594547272, "learning_rate": 9.24644094892388e-06, "loss": 0.4593, "step": 7096 }, { "epoch": 1.5830916796787866, "grad_norm": 0.18091976642608643, "learning_rate": 9.244094232104578e-06, "loss": 0.4611, "step": 7097 }, { "epoch": 1.583314744590676, "grad_norm": 0.29800283908843994, "learning_rate": 9.241747557152096e-06, "loss": 0.4455, "step": 7098 }, { "epoch": 1.5835378095025652, "grad_norm": 0.16630631685256958, "learning_rate": 9.239400924196402e-06, "loss": 0.4664, "step": 7099 }, { "epoch": 1.5837608744144545, "grad_norm": 0.17668049037456512, "learning_rate": 9.23705433336747e-06, "loss": 0.4666, "step": 7100 }, { "epoch": 1.583983939326344, "grad_norm": 0.17897532880306244, "learning_rate": 9.234707784795266e-06, "loss": 0.4586, "step": 7101 }, { "epoch": 1.5842070042382335, "grad_norm": 0.18220888078212738, "learning_rate": 9.232361278609761e-06, "loss": 0.459, "step": 7102 }, { "epoch": 1.5844300691501227, "grad_norm": 0.17005829513072968, "learning_rate": 9.230014814940917e-06, "loss": 0.4506, "step": 7103 }, { "epoch": 1.584653134062012, "grad_norm": 0.17794610559940338, "learning_rate": 9.227668393918695e-06, "loss": 0.4894, "step": 7104 }, { "epoch": 1.5848761989739013, "grad_norm": 0.1713520735502243, "learning_rate": 9.225322015673055e-06, "loss": 0.4762, "step": 7105 }, { "epoch": 1.5850992638857908, "grad_norm": 0.17363131046295166, "learning_rate": 9.222975680333956e-06, "loss": 0.453, "step": 7106 }, { "epoch": 1.58532232879768, "grad_norm": 0.1799049973487854, "learning_rate": 9.22062938803135e-06, "loss": 0.4692, "step": 7107 }, { "epoch": 1.5855453937095696, "grad_norm": 0.1882503479719162, "learning_rate": 9.218283138895192e-06, "loss": 0.4721, "step": 7108 }, { "epoch": 1.5857684586214589, "grad_norm": 0.19091439247131348, "learning_rate": 9.215936933055431e-06, "loss": 0.471, "step": 7109 }, { "epoch": 1.5859915235333482, "grad_norm": 0.1782108098268509, "learning_rate": 9.213590770642014e-06, "loss": 0.4753, "step": 7110 }, { "epoch": 1.5862145884452374, "grad_norm": 0.17361801862716675, "learning_rate": 9.211244651784888e-06, "loss": 0.4706, "step": 7111 }, { "epoch": 1.586437653357127, "grad_norm": 0.18161118030548096, "learning_rate": 9.208898576613993e-06, "loss": 0.457, "step": 7112 }, { "epoch": 1.5866607182690164, "grad_norm": 0.16798540949821472, "learning_rate": 9.206552545259274e-06, "loss": 0.4232, "step": 7113 }, { "epoch": 1.5868837831809057, "grad_norm": 0.1804846078157425, "learning_rate": 9.204206557850661e-06, "loss": 0.4703, "step": 7114 }, { "epoch": 1.587106848092795, "grad_norm": 0.17677569389343262, "learning_rate": 9.201860614518098e-06, "loss": 0.4451, "step": 7115 }, { "epoch": 1.5873299130046843, "grad_norm": 0.18691909313201904, "learning_rate": 9.199514715391514e-06, "loss": 0.4525, "step": 7116 }, { "epoch": 1.5875529779165736, "grad_norm": 0.1880151480436325, "learning_rate": 9.197168860600843e-06, "loss": 0.4887, "step": 7117 }, { "epoch": 1.587776042828463, "grad_norm": 0.18004381656646729, "learning_rate": 9.194823050276007e-06, "loss": 0.4763, "step": 7118 }, { "epoch": 1.5879991077403526, "grad_norm": 0.17934048175811768, "learning_rate": 9.192477284546937e-06, "loss": 0.4896, "step": 7119 }, { "epoch": 1.5882221726522419, "grad_norm": 0.17603585124015808, "learning_rate": 9.190131563543551e-06, "loss": 0.4514, "step": 7120 }, { "epoch": 1.5884452375641311, "grad_norm": 0.1763220578432083, "learning_rate": 9.187785887395778e-06, "loss": 0.4501, "step": 7121 }, { "epoch": 1.5886683024760204, "grad_norm": 0.16422604024410248, "learning_rate": 9.18544025623353e-06, "loss": 0.4382, "step": 7122 }, { "epoch": 1.58889136738791, "grad_norm": 0.17664875090122223, "learning_rate": 9.183094670186724e-06, "loss": 0.4802, "step": 7123 }, { "epoch": 1.5891144322997992, "grad_norm": 0.1742987334728241, "learning_rate": 9.180749129385273e-06, "loss": 0.4307, "step": 7124 }, { "epoch": 1.5893374972116887, "grad_norm": 0.16630713641643524, "learning_rate": 9.17840363395909e-06, "loss": 0.4245, "step": 7125 }, { "epoch": 1.589560562123578, "grad_norm": 0.17244166135787964, "learning_rate": 9.176058184038081e-06, "loss": 0.4488, "step": 7126 }, { "epoch": 1.5897836270354673, "grad_norm": 0.1764003038406372, "learning_rate": 9.17371277975215e-06, "loss": 0.4711, "step": 7127 }, { "epoch": 1.5900066919473566, "grad_norm": 0.18569590151309967, "learning_rate": 9.171367421231207e-06, "loss": 0.4765, "step": 7128 }, { "epoch": 1.590229756859246, "grad_norm": 0.19138166308403015, "learning_rate": 9.169022108605147e-06, "loss": 0.4742, "step": 7129 }, { "epoch": 1.5904528217711356, "grad_norm": 0.17266172170639038, "learning_rate": 9.16667684200387e-06, "loss": 0.4525, "step": 7130 }, { "epoch": 1.5906758866830248, "grad_norm": 0.17976059019565582, "learning_rate": 9.16433162155727e-06, "loss": 0.4669, "step": 7131 }, { "epoch": 1.5908989515949141, "grad_norm": 0.16930918395519257, "learning_rate": 9.161986447395244e-06, "loss": 0.4463, "step": 7132 }, { "epoch": 1.5911220165068034, "grad_norm": 0.185069739818573, "learning_rate": 9.159641319647676e-06, "loss": 0.4553, "step": 7133 }, { "epoch": 1.5913450814186927, "grad_norm": 0.17120788991451263, "learning_rate": 9.157296238444462e-06, "loss": 0.4564, "step": 7134 }, { "epoch": 1.5915681463305822, "grad_norm": 0.18028856813907623, "learning_rate": 9.15495120391548e-06, "loss": 0.4903, "step": 7135 }, { "epoch": 1.5917912112424717, "grad_norm": 0.18500107526779175, "learning_rate": 9.152606216190619e-06, "loss": 0.4839, "step": 7136 }, { "epoch": 1.592014276154361, "grad_norm": 0.17206281423568726, "learning_rate": 9.150261275399752e-06, "loss": 0.4445, "step": 7137 }, { "epoch": 1.5922373410662503, "grad_norm": 0.19422174990177155, "learning_rate": 9.147916381672763e-06, "loss": 0.4412, "step": 7138 }, { "epoch": 1.5924604059781395, "grad_norm": 0.17535541951656342, "learning_rate": 9.145571535139524e-06, "loss": 0.4682, "step": 7139 }, { "epoch": 1.592683470890029, "grad_norm": 0.17184211313724518, "learning_rate": 9.14322673592991e-06, "loss": 0.4383, "step": 7140 }, { "epoch": 1.5929065358019183, "grad_norm": 0.1792580634355545, "learning_rate": 9.140881984173786e-06, "loss": 0.4486, "step": 7141 }, { "epoch": 1.5931296007138078, "grad_norm": 0.17893850803375244, "learning_rate": 9.138537280001025e-06, "loss": 0.4494, "step": 7142 }, { "epoch": 1.593352665625697, "grad_norm": 0.17815670371055603, "learning_rate": 9.136192623541487e-06, "loss": 0.46, "step": 7143 }, { "epoch": 1.5935757305375864, "grad_norm": 0.17325416207313538, "learning_rate": 9.133848014925032e-06, "loss": 0.4582, "step": 7144 }, { "epoch": 1.5937987954494757, "grad_norm": 0.1770549863576889, "learning_rate": 9.131503454281526e-06, "loss": 0.4393, "step": 7145 }, { "epoch": 1.5940218603613652, "grad_norm": 0.1701727956533432, "learning_rate": 9.129158941740818e-06, "loss": 0.4728, "step": 7146 }, { "epoch": 1.5942449252732547, "grad_norm": 0.1813841164112091, "learning_rate": 9.12681447743277e-06, "loss": 0.4432, "step": 7147 }, { "epoch": 1.594467990185144, "grad_norm": 0.17347143590450287, "learning_rate": 9.124470061487225e-06, "loss": 0.4405, "step": 7148 }, { "epoch": 1.5946910550970332, "grad_norm": 0.18103331327438354, "learning_rate": 9.122125694034039e-06, "loss": 0.4811, "step": 7149 }, { "epoch": 1.5949141200089225, "grad_norm": 0.17634287476539612, "learning_rate": 9.11978137520305e-06, "loss": 0.4625, "step": 7150 }, { "epoch": 1.5951371849208118, "grad_norm": 0.16531561315059662, "learning_rate": 9.117437105124107e-06, "loss": 0.4206, "step": 7151 }, { "epoch": 1.5953602498327013, "grad_norm": 0.1829378753900528, "learning_rate": 9.11509288392705e-06, "loss": 0.4665, "step": 7152 }, { "epoch": 1.5955833147445908, "grad_norm": 0.17331163585186005, "learning_rate": 9.112748711741712e-06, "loss": 0.4393, "step": 7153 }, { "epoch": 1.59580637965648, "grad_norm": 0.1706630289554596, "learning_rate": 9.110404588697935e-06, "loss": 0.46, "step": 7154 }, { "epoch": 1.5960294445683694, "grad_norm": 0.16561025381088257, "learning_rate": 9.108060514925546e-06, "loss": 0.4287, "step": 7155 }, { "epoch": 1.5962525094802587, "grad_norm": 0.17239882051944733, "learning_rate": 9.105716490554377e-06, "loss": 0.466, "step": 7156 }, { "epoch": 1.5964755743921482, "grad_norm": 0.16943053901195526, "learning_rate": 9.103372515714252e-06, "loss": 0.4275, "step": 7157 }, { "epoch": 1.5966986393040374, "grad_norm": 0.1701379269361496, "learning_rate": 9.101028590534999e-06, "loss": 0.4459, "step": 7158 }, { "epoch": 1.596921704215927, "grad_norm": 0.18113826215267181, "learning_rate": 9.098684715146435e-06, "loss": 0.454, "step": 7159 }, { "epoch": 1.5971447691278162, "grad_norm": 0.19422270357608795, "learning_rate": 9.096340889678384e-06, "loss": 0.4594, "step": 7160 }, { "epoch": 1.5973678340397055, "grad_norm": 0.18085762858390808, "learning_rate": 9.093997114260658e-06, "loss": 0.4853, "step": 7161 }, { "epoch": 1.5975908989515948, "grad_norm": 0.18114502727985382, "learning_rate": 9.09165338902307e-06, "loss": 0.4688, "step": 7162 }, { "epoch": 1.5978139638634843, "grad_norm": 0.18122133612632751, "learning_rate": 9.089309714095432e-06, "loss": 0.4679, "step": 7163 }, { "epoch": 1.5980370287753738, "grad_norm": 0.18571965396404266, "learning_rate": 9.086966089607551e-06, "loss": 0.48, "step": 7164 }, { "epoch": 1.598260093687263, "grad_norm": 0.1749122440814972, "learning_rate": 9.084622515689228e-06, "loss": 0.4706, "step": 7165 }, { "epoch": 1.5984831585991524, "grad_norm": 0.17465496063232422, "learning_rate": 9.082278992470272e-06, "loss": 0.4586, "step": 7166 }, { "epoch": 1.5987062235110416, "grad_norm": 0.1802026480436325, "learning_rate": 9.079935520080474e-06, "loss": 0.4348, "step": 7167 }, { "epoch": 1.598929288422931, "grad_norm": 0.17339572310447693, "learning_rate": 9.077592098649639e-06, "loss": 0.4688, "step": 7168 }, { "epoch": 1.5991523533348204, "grad_norm": 0.17421960830688477, "learning_rate": 9.075248728307551e-06, "loss": 0.4779, "step": 7169 }, { "epoch": 1.59937541824671, "grad_norm": 0.17133331298828125, "learning_rate": 9.07290540918401e-06, "loss": 0.451, "step": 7170 }, { "epoch": 1.5995984831585992, "grad_norm": 0.1657906174659729, "learning_rate": 9.070562141408795e-06, "loss": 0.4296, "step": 7171 }, { "epoch": 1.5998215480704885, "grad_norm": 0.17014503479003906, "learning_rate": 9.068218925111695e-06, "loss": 0.4302, "step": 7172 }, { "epoch": 1.6000446129823778, "grad_norm": 0.1789960116147995, "learning_rate": 9.065875760422496e-06, "loss": 0.4808, "step": 7173 }, { "epoch": 1.6002676778942673, "grad_norm": 0.16898800432682037, "learning_rate": 9.06353264747097e-06, "loss": 0.4614, "step": 7174 }, { "epoch": 1.6004907428061566, "grad_norm": 0.1819867193698883, "learning_rate": 9.0611895863869e-06, "loss": 0.4687, "step": 7175 }, { "epoch": 1.600713807718046, "grad_norm": 0.17777277529239655, "learning_rate": 9.058846577300052e-06, "loss": 0.4452, "step": 7176 }, { "epoch": 1.6009368726299353, "grad_norm": 0.1771070510149002, "learning_rate": 9.056503620340204e-06, "loss": 0.4647, "step": 7177 }, { "epoch": 1.6011599375418246, "grad_norm": 0.17987768352031708, "learning_rate": 9.054160715637117e-06, "loss": 0.465, "step": 7178 }, { "epoch": 1.601383002453714, "grad_norm": 0.17189104855060577, "learning_rate": 9.051817863320563e-06, "loss": 0.46, "step": 7179 }, { "epoch": 1.6016060673656034, "grad_norm": 0.19388031959533691, "learning_rate": 9.049475063520295e-06, "loss": 0.4347, "step": 7180 }, { "epoch": 1.601829132277493, "grad_norm": 0.1758044809103012, "learning_rate": 9.047132316366082e-06, "loss": 0.4477, "step": 7181 }, { "epoch": 1.6020521971893822, "grad_norm": 0.19193986058235168, "learning_rate": 9.044789621987672e-06, "loss": 0.4698, "step": 7182 }, { "epoch": 1.6022752621012715, "grad_norm": 0.1996065378189087, "learning_rate": 9.042446980514822e-06, "loss": 0.4733, "step": 7183 }, { "epoch": 1.6024983270131608, "grad_norm": 0.1759694218635559, "learning_rate": 9.040104392077283e-06, "loss": 0.4534, "step": 7184 }, { "epoch": 1.60272139192505, "grad_norm": 0.1798923909664154, "learning_rate": 9.0377618568048e-06, "loss": 0.4792, "step": 7185 }, { "epoch": 1.6029444568369395, "grad_norm": 0.17134787142276764, "learning_rate": 9.035419374827116e-06, "loss": 0.4889, "step": 7186 }, { "epoch": 1.603167521748829, "grad_norm": 0.2020411342382431, "learning_rate": 9.033076946273978e-06, "loss": 0.4891, "step": 7187 }, { "epoch": 1.6033905866607183, "grad_norm": 0.1749039590358734, "learning_rate": 9.030734571275118e-06, "loss": 0.4535, "step": 7188 }, { "epoch": 1.6036136515726076, "grad_norm": 0.1836901307106018, "learning_rate": 9.028392249960276e-06, "loss": 0.4793, "step": 7189 }, { "epoch": 1.6038367164844969, "grad_norm": 0.19300808012485504, "learning_rate": 9.02604998245918e-06, "loss": 0.4434, "step": 7190 }, { "epoch": 1.6040597813963864, "grad_norm": 0.18210625648498535, "learning_rate": 9.023707768901567e-06, "loss": 0.4591, "step": 7191 }, { "epoch": 1.6042828463082757, "grad_norm": 0.17498520016670227, "learning_rate": 9.021365609417155e-06, "loss": 0.4658, "step": 7192 }, { "epoch": 1.6045059112201652, "grad_norm": 0.17925825715065002, "learning_rate": 9.019023504135671e-06, "loss": 0.4617, "step": 7193 }, { "epoch": 1.6047289761320545, "grad_norm": 0.18746887147426605, "learning_rate": 9.01668145318684e-06, "loss": 0.4819, "step": 7194 }, { "epoch": 1.6049520410439437, "grad_norm": 0.16884252429008484, "learning_rate": 9.014339456700371e-06, "loss": 0.4457, "step": 7195 }, { "epoch": 1.605175105955833, "grad_norm": 0.1717928946018219, "learning_rate": 9.011997514805986e-06, "loss": 0.4507, "step": 7196 }, { "epoch": 1.6053981708677225, "grad_norm": 0.18760330975055695, "learning_rate": 9.009655627633392e-06, "loss": 0.4447, "step": 7197 }, { "epoch": 1.605621235779612, "grad_norm": 0.16959816217422485, "learning_rate": 9.0073137953123e-06, "loss": 0.4185, "step": 7198 }, { "epoch": 1.6058443006915013, "grad_norm": 0.17206130921840668, "learning_rate": 9.004972017972414e-06, "loss": 0.4572, "step": 7199 }, { "epoch": 1.6060673656033906, "grad_norm": 0.1713947355747223, "learning_rate": 9.002630295743437e-06, "loss": 0.4656, "step": 7200 }, { "epoch": 1.6062904305152799, "grad_norm": 0.1734878569841385, "learning_rate": 9.000288628755065e-06, "loss": 0.4543, "step": 7201 }, { "epoch": 1.6065134954271691, "grad_norm": 0.17771008610725403, "learning_rate": 8.997947017137e-06, "loss": 0.4749, "step": 7202 }, { "epoch": 1.6067365603390587, "grad_norm": 0.17664694786071777, "learning_rate": 8.995605461018933e-06, "loss": 0.4472, "step": 7203 }, { "epoch": 1.6069596252509482, "grad_norm": 0.18092039227485657, "learning_rate": 8.993263960530552e-06, "loss": 0.4716, "step": 7204 }, { "epoch": 1.6071826901628374, "grad_norm": 0.17921680212020874, "learning_rate": 8.990922515801546e-06, "loss": 0.4736, "step": 7205 }, { "epoch": 1.6074057550747267, "grad_norm": 0.17247629165649414, "learning_rate": 8.9885811269616e-06, "loss": 0.4575, "step": 7206 }, { "epoch": 1.607628819986616, "grad_norm": 0.1867935061454773, "learning_rate": 8.986239794140389e-06, "loss": 0.4509, "step": 7207 }, { "epoch": 1.6078518848985055, "grad_norm": 0.18020030856132507, "learning_rate": 8.983898517467598e-06, "loss": 0.4526, "step": 7208 }, { "epoch": 1.6080749498103948, "grad_norm": 0.17290444672107697, "learning_rate": 8.981557297072897e-06, "loss": 0.4358, "step": 7209 }, { "epoch": 1.6082980147222843, "grad_norm": 0.17369163036346436, "learning_rate": 8.979216133085961e-06, "loss": 0.4485, "step": 7210 }, { "epoch": 1.6085210796341736, "grad_norm": 0.17620067298412323, "learning_rate": 8.976875025636455e-06, "loss": 0.4515, "step": 7211 }, { "epoch": 1.6087441445460628, "grad_norm": 0.17683733999729156, "learning_rate": 8.974533974854046e-06, "loss": 0.4434, "step": 7212 }, { "epoch": 1.6089672094579521, "grad_norm": 0.16844935715198517, "learning_rate": 8.972192980868397e-06, "loss": 0.4471, "step": 7213 }, { "epoch": 1.6091902743698416, "grad_norm": 0.17559292912483215, "learning_rate": 8.969852043809165e-06, "loss": 0.4755, "step": 7214 }, { "epoch": 1.6094133392817311, "grad_norm": 0.17093756794929504, "learning_rate": 8.967511163806008e-06, "loss": 0.4571, "step": 7215 }, { "epoch": 1.6096364041936204, "grad_norm": 0.17171628773212433, "learning_rate": 8.965170340988574e-06, "loss": 0.4461, "step": 7216 }, { "epoch": 1.6098594691055097, "grad_norm": 0.17218218743801117, "learning_rate": 8.96282957548652e-06, "loss": 0.4595, "step": 7217 }, { "epoch": 1.610082534017399, "grad_norm": 0.17221598327159882, "learning_rate": 8.960488867429486e-06, "loss": 0.4469, "step": 7218 }, { "epoch": 1.6103055989292883, "grad_norm": 0.19169965386390686, "learning_rate": 8.958148216947118e-06, "loss": 0.4863, "step": 7219 }, { "epoch": 1.6105286638411778, "grad_norm": 0.19341188669204712, "learning_rate": 8.955807624169054e-06, "loss": 0.482, "step": 7220 }, { "epoch": 1.6107517287530673, "grad_norm": 0.17554320394992828, "learning_rate": 8.953467089224934e-06, "loss": 0.4396, "step": 7221 }, { "epoch": 1.6109747936649566, "grad_norm": 0.17117850482463837, "learning_rate": 8.951126612244387e-06, "loss": 0.4319, "step": 7222 }, { "epoch": 1.6111978585768458, "grad_norm": 0.17589138448238373, "learning_rate": 8.948786193357049e-06, "loss": 0.4553, "step": 7223 }, { "epoch": 1.6114209234887351, "grad_norm": 0.265455961227417, "learning_rate": 8.946445832692545e-06, "loss": 0.481, "step": 7224 }, { "epoch": 1.6116439884006246, "grad_norm": 0.17754699289798737, "learning_rate": 8.944105530380495e-06, "loss": 0.4489, "step": 7225 }, { "epoch": 1.611867053312514, "grad_norm": 0.19480863213539124, "learning_rate": 8.941765286550523e-06, "loss": 0.4515, "step": 7226 }, { "epoch": 1.6120901182244034, "grad_norm": 0.16930724680423737, "learning_rate": 8.939425101332245e-06, "loss": 0.4376, "step": 7227 }, { "epoch": 1.6123131831362927, "grad_norm": 0.1783130019903183, "learning_rate": 8.937084974855278e-06, "loss": 0.4679, "step": 7228 }, { "epoch": 1.612536248048182, "grad_norm": 0.19291211664676666, "learning_rate": 8.934744907249229e-06, "loss": 0.481, "step": 7229 }, { "epoch": 1.6127593129600712, "grad_norm": 0.1795196533203125, "learning_rate": 8.93240489864371e-06, "loss": 0.4683, "step": 7230 }, { "epoch": 1.6129823778719607, "grad_norm": 0.17331638932228088, "learning_rate": 8.930064949168322e-06, "loss": 0.4553, "step": 7231 }, { "epoch": 1.6132054427838503, "grad_norm": 0.15906356275081635, "learning_rate": 8.927725058952669e-06, "loss": 0.4416, "step": 7232 }, { "epoch": 1.6134285076957395, "grad_norm": 0.17841282486915588, "learning_rate": 8.925385228126344e-06, "loss": 0.4671, "step": 7233 }, { "epoch": 1.6136515726076288, "grad_norm": 0.17484787106513977, "learning_rate": 8.923045456818947e-06, "loss": 0.4748, "step": 7234 }, { "epoch": 1.613874637519518, "grad_norm": 0.17049816250801086, "learning_rate": 8.920705745160064e-06, "loss": 0.4464, "step": 7235 }, { "epoch": 1.6140977024314074, "grad_norm": 0.17688851058483124, "learning_rate": 8.918366093279288e-06, "loss": 0.4538, "step": 7236 }, { "epoch": 1.6143207673432969, "grad_norm": 0.19764314591884613, "learning_rate": 8.916026501306203e-06, "loss": 0.464, "step": 7237 }, { "epoch": 1.6145438322551864, "grad_norm": 0.16911235451698303, "learning_rate": 8.913686969370386e-06, "loss": 0.4505, "step": 7238 }, { "epoch": 1.6147668971670757, "grad_norm": 0.1755746304988861, "learning_rate": 8.911347497601418e-06, "loss": 0.4726, "step": 7239 }, { "epoch": 1.614989962078965, "grad_norm": 0.17547662556171417, "learning_rate": 8.909008086128874e-06, "loss": 0.4596, "step": 7240 }, { "epoch": 1.6152130269908542, "grad_norm": 0.18209370970726013, "learning_rate": 8.906668735082327e-06, "loss": 0.4692, "step": 7241 }, { "epoch": 1.6154360919027437, "grad_norm": 0.17402005195617676, "learning_rate": 8.90432944459134e-06, "loss": 0.4477, "step": 7242 }, { "epoch": 1.615659156814633, "grad_norm": 0.1721334308385849, "learning_rate": 8.901990214785483e-06, "loss": 0.4463, "step": 7243 }, { "epoch": 1.6158822217265225, "grad_norm": 0.17670831084251404, "learning_rate": 8.899651045794313e-06, "loss": 0.4583, "step": 7244 }, { "epoch": 1.6161052866384118, "grad_norm": 0.1660810261964798, "learning_rate": 8.89731193774739e-06, "loss": 0.4553, "step": 7245 }, { "epoch": 1.616328351550301, "grad_norm": 0.16802763938903809, "learning_rate": 8.894972890774266e-06, "loss": 0.4426, "step": 7246 }, { "epoch": 1.6165514164621904, "grad_norm": 0.18203771114349365, "learning_rate": 8.892633905004498e-06, "loss": 0.4281, "step": 7247 }, { "epoch": 1.6167744813740799, "grad_norm": 0.17793646454811096, "learning_rate": 8.890294980567626e-06, "loss": 0.4463, "step": 7248 }, { "epoch": 1.6169975462859694, "grad_norm": 0.1768350899219513, "learning_rate": 8.887956117593201e-06, "loss": 0.4461, "step": 7249 }, { "epoch": 1.6172206111978586, "grad_norm": 0.16968591511249542, "learning_rate": 8.88561731621076e-06, "loss": 0.4256, "step": 7250 }, { "epoch": 1.617443676109748, "grad_norm": 0.17799051105976105, "learning_rate": 8.883278576549843e-06, "loss": 0.4425, "step": 7251 }, { "epoch": 1.6176667410216372, "grad_norm": 0.1699870377779007, "learning_rate": 8.88093989873998e-06, "loss": 0.4579, "step": 7252 }, { "epoch": 1.6178898059335265, "grad_norm": 0.17788825929164886, "learning_rate": 8.87860128291071e-06, "loss": 0.4816, "step": 7253 }, { "epoch": 1.618112870845416, "grad_norm": 0.1790701150894165, "learning_rate": 8.87626272919155e-06, "loss": 0.4347, "step": 7254 }, { "epoch": 1.6183359357573055, "grad_norm": 0.1901506632566452, "learning_rate": 8.87392423771203e-06, "loss": 0.4522, "step": 7255 }, { "epoch": 1.6185590006691948, "grad_norm": 0.19295074045658112, "learning_rate": 8.871585808601669e-06, "loss": 0.4651, "step": 7256 }, { "epoch": 1.618782065581084, "grad_norm": 0.1766098290681839, "learning_rate": 8.869247441989983e-06, "loss": 0.4137, "step": 7257 }, { "epoch": 1.6190051304929733, "grad_norm": 0.17081737518310547, "learning_rate": 8.866909138006488e-06, "loss": 0.4277, "step": 7258 }, { "epoch": 1.6192281954048628, "grad_norm": 0.17560650408267975, "learning_rate": 8.864570896780691e-06, "loss": 0.4675, "step": 7259 }, { "epoch": 1.6194512603167521, "grad_norm": 0.1765887439250946, "learning_rate": 8.862232718442101e-06, "loss": 0.4536, "step": 7260 }, { "epoch": 1.6196743252286416, "grad_norm": 0.18248333036899567, "learning_rate": 8.85989460312022e-06, "loss": 0.4778, "step": 7261 }, { "epoch": 1.619897390140531, "grad_norm": 0.17278814315795898, "learning_rate": 8.857556550944548e-06, "loss": 0.4378, "step": 7262 }, { "epoch": 1.6201204550524202, "grad_norm": 0.18780216574668884, "learning_rate": 8.855218562044579e-06, "loss": 0.4768, "step": 7263 }, { "epoch": 1.6203435199643095, "grad_norm": 0.16610752046108246, "learning_rate": 8.852880636549808e-06, "loss": 0.4362, "step": 7264 }, { "epoch": 1.620566584876199, "grad_norm": 0.16919295489788055, "learning_rate": 8.85054277458972e-06, "loss": 0.4394, "step": 7265 }, { "epoch": 1.6207896497880885, "grad_norm": 0.16546227037906647, "learning_rate": 8.848204976293807e-06, "loss": 0.4484, "step": 7266 }, { "epoch": 1.6210127146999778, "grad_norm": 0.1768079698085785, "learning_rate": 8.845867241791548e-06, "loss": 0.445, "step": 7267 }, { "epoch": 1.621235779611867, "grad_norm": 0.17911723256111145, "learning_rate": 8.84352957121242e-06, "loss": 0.4643, "step": 7268 }, { "epoch": 1.6214588445237563, "grad_norm": 0.17884330451488495, "learning_rate": 8.841191964685896e-06, "loss": 0.4687, "step": 7269 }, { "epoch": 1.6216819094356456, "grad_norm": 0.17899781465530396, "learning_rate": 8.838854422341454e-06, "loss": 0.4641, "step": 7270 }, { "epoch": 1.6219049743475351, "grad_norm": 0.17130422592163086, "learning_rate": 8.836516944308555e-06, "loss": 0.4413, "step": 7271 }, { "epoch": 1.6221280392594246, "grad_norm": 0.17263583838939667, "learning_rate": 8.834179530716669e-06, "loss": 0.4521, "step": 7272 }, { "epoch": 1.622351104171314, "grad_norm": 0.17574861645698547, "learning_rate": 8.831842181695251e-06, "loss": 0.4695, "step": 7273 }, { "epoch": 1.6225741690832032, "grad_norm": 0.1733783781528473, "learning_rate": 8.829504897373764e-06, "loss": 0.4458, "step": 7274 }, { "epoch": 1.6227972339950925, "grad_norm": 0.18253105878829956, "learning_rate": 8.827167677881656e-06, "loss": 0.4412, "step": 7275 }, { "epoch": 1.623020298906982, "grad_norm": 0.1744835376739502, "learning_rate": 8.824830523348383e-06, "loss": 0.441, "step": 7276 }, { "epoch": 1.6232433638188712, "grad_norm": 0.17317108809947968, "learning_rate": 8.822493433903383e-06, "loss": 0.4819, "step": 7277 }, { "epoch": 1.6234664287307607, "grad_norm": 0.17538447678089142, "learning_rate": 8.820156409676105e-06, "loss": 0.4303, "step": 7278 }, { "epoch": 1.62368949364265, "grad_norm": 0.18321259319782257, "learning_rate": 8.817819450795989e-06, "loss": 0.4616, "step": 7279 }, { "epoch": 1.6239125585545393, "grad_norm": 0.17948691546916962, "learning_rate": 8.815482557392463e-06, "loss": 0.4734, "step": 7280 }, { "epoch": 1.6241356234664286, "grad_norm": 0.17915484309196472, "learning_rate": 8.81314572959497e-06, "loss": 0.4497, "step": 7281 }, { "epoch": 1.624358688378318, "grad_norm": 0.18481531739234924, "learning_rate": 8.810808967532927e-06, "loss": 0.4729, "step": 7282 }, { "epoch": 1.6245817532902076, "grad_norm": 0.18924424052238464, "learning_rate": 8.808472271335767e-06, "loss": 0.4762, "step": 7283 }, { "epoch": 1.6248048182020969, "grad_norm": 0.1721714437007904, "learning_rate": 8.806135641132907e-06, "loss": 0.4682, "step": 7284 }, { "epoch": 1.6250278831139862, "grad_norm": 0.1784234195947647, "learning_rate": 8.803799077053765e-06, "loss": 0.465, "step": 7285 }, { "epoch": 1.6252509480258754, "grad_norm": 0.18713349103927612, "learning_rate": 8.801462579227751e-06, "loss": 0.4549, "step": 7286 }, { "epoch": 1.625474012937765, "grad_norm": 0.1756950169801712, "learning_rate": 8.799126147784284e-06, "loss": 0.4389, "step": 7287 }, { "epoch": 1.6256970778496542, "grad_norm": 0.16715659201145172, "learning_rate": 8.796789782852761e-06, "loss": 0.4456, "step": 7288 }, { "epoch": 1.6259201427615437, "grad_norm": 0.1801193356513977, "learning_rate": 8.79445348456259e-06, "loss": 0.4601, "step": 7289 }, { "epoch": 1.626143207673433, "grad_norm": 0.18265050649642944, "learning_rate": 8.792117253043166e-06, "loss": 0.4617, "step": 7290 }, { "epoch": 1.6263662725853223, "grad_norm": 0.17601865530014038, "learning_rate": 8.78978108842389e-06, "loss": 0.4572, "step": 7291 }, { "epoch": 1.6265893374972116, "grad_norm": 0.1800207793712616, "learning_rate": 8.787444990834146e-06, "loss": 0.4626, "step": 7292 }, { "epoch": 1.626812402409101, "grad_norm": 0.18082986772060394, "learning_rate": 8.78510896040333e-06, "loss": 0.4583, "step": 7293 }, { "epoch": 1.6270354673209904, "grad_norm": 0.1825123131275177, "learning_rate": 8.782772997260819e-06, "loss": 0.4604, "step": 7294 }, { "epoch": 1.6272585322328799, "grad_norm": 0.17616558074951172, "learning_rate": 8.780437101535997e-06, "loss": 0.442, "step": 7295 }, { "epoch": 1.6274815971447691, "grad_norm": 0.17222437262535095, "learning_rate": 8.778101273358238e-06, "loss": 0.4374, "step": 7296 }, { "epoch": 1.6277046620566584, "grad_norm": 0.1752903312444687, "learning_rate": 8.775765512856919e-06, "loss": 0.4703, "step": 7297 }, { "epoch": 1.6279277269685477, "grad_norm": 0.17624272406101227, "learning_rate": 8.773429820161404e-06, "loss": 0.4692, "step": 7298 }, { "epoch": 1.6281507918804372, "grad_norm": 0.17555177211761475, "learning_rate": 8.771094195401062e-06, "loss": 0.4517, "step": 7299 }, { "epoch": 1.6283738567923267, "grad_norm": 0.1727064996957779, "learning_rate": 8.768758638705253e-06, "loss": 0.4617, "step": 7300 }, { "epoch": 1.628596921704216, "grad_norm": 0.17664147913455963, "learning_rate": 8.766423150203335e-06, "loss": 0.4565, "step": 7301 }, { "epoch": 1.6288199866161053, "grad_norm": 0.17871947586536407, "learning_rate": 8.764087730024667e-06, "loss": 0.464, "step": 7302 }, { "epoch": 1.6290430515279946, "grad_norm": 0.18316639959812164, "learning_rate": 8.761752378298589e-06, "loss": 0.4474, "step": 7303 }, { "epoch": 1.629266116439884, "grad_norm": 0.19442254304885864, "learning_rate": 8.759417095154456e-06, "loss": 0.4353, "step": 7304 }, { "epoch": 1.6294891813517733, "grad_norm": 0.17314781248569489, "learning_rate": 8.757081880721612e-06, "loss": 0.4544, "step": 7305 }, { "epoch": 1.6297122462636628, "grad_norm": 0.1879189908504486, "learning_rate": 8.754746735129385e-06, "loss": 0.4588, "step": 7306 }, { "epoch": 1.6299353111755521, "grad_norm": 0.17803287506103516, "learning_rate": 8.752411658507121e-06, "loss": 0.4526, "step": 7307 }, { "epoch": 1.6301583760874414, "grad_norm": 0.18903419375419617, "learning_rate": 8.750076650984143e-06, "loss": 0.4412, "step": 7308 }, { "epoch": 1.6303814409993307, "grad_norm": 0.17319492995738983, "learning_rate": 8.747741712689786e-06, "loss": 0.4506, "step": 7309 }, { "epoch": 1.6306045059112202, "grad_norm": 0.1812223494052887, "learning_rate": 8.745406843753369e-06, "loss": 0.4256, "step": 7310 }, { "epoch": 1.6308275708231095, "grad_norm": 0.1832951158285141, "learning_rate": 8.743072044304212e-06, "loss": 0.4721, "step": 7311 }, { "epoch": 1.631050635734999, "grad_norm": 0.183165043592453, "learning_rate": 8.74073731447163e-06, "loss": 0.4675, "step": 7312 }, { "epoch": 1.6312737006468883, "grad_norm": 0.18091800808906555, "learning_rate": 8.738402654384939e-06, "loss": 0.4438, "step": 7313 }, { "epoch": 1.6314967655587775, "grad_norm": 0.1746785044670105, "learning_rate": 8.736068064173444e-06, "loss": 0.4539, "step": 7314 }, { "epoch": 1.6317198304706668, "grad_norm": 0.18262261152267456, "learning_rate": 8.733733543966449e-06, "loss": 0.4514, "step": 7315 }, { "epoch": 1.6319428953825563, "grad_norm": 0.18014779686927795, "learning_rate": 8.731399093893256e-06, "loss": 0.4688, "step": 7316 }, { "epoch": 1.6321659602944458, "grad_norm": 0.1781376600265503, "learning_rate": 8.729064714083163e-06, "loss": 0.4552, "step": 7317 }, { "epoch": 1.632389025206335, "grad_norm": 0.17916658520698547, "learning_rate": 8.726730404665458e-06, "loss": 0.4463, "step": 7318 }, { "epoch": 1.6326120901182244, "grad_norm": 0.17721021175384521, "learning_rate": 8.724396165769435e-06, "loss": 0.4552, "step": 7319 }, { "epoch": 1.6328351550301137, "grad_norm": 0.18291765451431274, "learning_rate": 8.722061997524374e-06, "loss": 0.4492, "step": 7320 }, { "epoch": 1.6330582199420032, "grad_norm": 0.19074088335037231, "learning_rate": 8.719727900059559e-06, "loss": 0.4784, "step": 7321 }, { "epoch": 1.6332812848538925, "grad_norm": 0.18184469640254974, "learning_rate": 8.717393873504265e-06, "loss": 0.4602, "step": 7322 }, { "epoch": 1.633504349765782, "grad_norm": 0.17274357378482819, "learning_rate": 8.715059917987766e-06, "loss": 0.4394, "step": 7323 }, { "epoch": 1.6337274146776712, "grad_norm": 0.1714925616979599, "learning_rate": 8.712726033639334e-06, "loss": 0.4277, "step": 7324 }, { "epoch": 1.6339504795895605, "grad_norm": 0.17422372102737427, "learning_rate": 8.710392220588229e-06, "loss": 0.4585, "step": 7325 }, { "epoch": 1.6341735445014498, "grad_norm": 0.17235322296619415, "learning_rate": 8.708058478963717e-06, "loss": 0.4489, "step": 7326 }, { "epoch": 1.6343966094133393, "grad_norm": 0.17422306537628174, "learning_rate": 8.70572480889505e-06, "loss": 0.4624, "step": 7327 }, { "epoch": 1.6346196743252286, "grad_norm": 0.1853763461112976, "learning_rate": 8.703391210511486e-06, "loss": 0.4723, "step": 7328 }, { "epoch": 1.634842739237118, "grad_norm": 0.17920438945293427, "learning_rate": 8.701057683942274e-06, "loss": 0.4596, "step": 7329 }, { "epoch": 1.6350658041490074, "grad_norm": 0.17522959411144257, "learning_rate": 8.698724229316658e-06, "loss": 0.4501, "step": 7330 }, { "epoch": 1.6352888690608967, "grad_norm": 0.17898008227348328, "learning_rate": 8.696390846763877e-06, "loss": 0.4416, "step": 7331 }, { "epoch": 1.635511933972786, "grad_norm": 0.1786876916885376, "learning_rate": 8.694057536413175e-06, "loss": 0.4541, "step": 7332 }, { "epoch": 1.6357349988846754, "grad_norm": 0.18402035534381866, "learning_rate": 8.691724298393777e-06, "loss": 0.4781, "step": 7333 }, { "epoch": 1.635958063796565, "grad_norm": 0.18904945254325867, "learning_rate": 8.68939113283492e-06, "loss": 0.4541, "step": 7334 }, { "epoch": 1.6361811287084542, "grad_norm": 0.18246184289455414, "learning_rate": 8.687058039865823e-06, "loss": 0.4711, "step": 7335 }, { "epoch": 1.6364041936203435, "grad_norm": 0.1694568693637848, "learning_rate": 8.684725019615714e-06, "loss": 0.4648, "step": 7336 }, { "epoch": 1.6366272585322328, "grad_norm": 0.17736688256263733, "learning_rate": 8.682392072213804e-06, "loss": 0.4192, "step": 7337 }, { "epoch": 1.6368503234441223, "grad_norm": 0.16740672290325165, "learning_rate": 8.680059197789311e-06, "loss": 0.4591, "step": 7338 }, { "epoch": 1.6370733883560116, "grad_norm": 0.17872267961502075, "learning_rate": 8.67772639647144e-06, "loss": 0.4453, "step": 7339 }, { "epoch": 1.637296453267901, "grad_norm": 0.18077607452869415, "learning_rate": 8.675393668389402e-06, "loss": 0.459, "step": 7340 }, { "epoch": 1.6375195181797904, "grad_norm": 0.17609618604183197, "learning_rate": 8.67306101367239e-06, "loss": 0.4774, "step": 7341 }, { "epoch": 1.6377425830916796, "grad_norm": 0.17567932605743408, "learning_rate": 8.670728432449608e-06, "loss": 0.4577, "step": 7342 }, { "epoch": 1.637965648003569, "grad_norm": 0.18669098615646362, "learning_rate": 8.66839592485025e-06, "loss": 0.4321, "step": 7343 }, { "epoch": 1.6381887129154584, "grad_norm": 0.1745385229587555, "learning_rate": 8.666063491003499e-06, "loss": 0.4595, "step": 7344 }, { "epoch": 1.6384117778273477, "grad_norm": 0.16913901269435883, "learning_rate": 8.663731131038544e-06, "loss": 0.4179, "step": 7345 }, { "epoch": 1.6386348427392372, "grad_norm": 0.16662278771400452, "learning_rate": 8.661398845084562e-06, "loss": 0.4333, "step": 7346 }, { "epoch": 1.6388579076511265, "grad_norm": 0.17218153178691864, "learning_rate": 8.659066633270736e-06, "loss": 0.4324, "step": 7347 }, { "epoch": 1.6390809725630158, "grad_norm": 0.1764010488986969, "learning_rate": 8.65673449572623e-06, "loss": 0.44, "step": 7348 }, { "epoch": 1.639304037474905, "grad_norm": 0.17202350497245789, "learning_rate": 8.65440243258022e-06, "loss": 0.4611, "step": 7349 }, { "epoch": 1.6395271023867946, "grad_norm": 0.17550015449523926, "learning_rate": 8.652070443961866e-06, "loss": 0.4641, "step": 7350 }, { "epoch": 1.639750167298684, "grad_norm": 0.18475106358528137, "learning_rate": 8.649738530000333e-06, "loss": 0.4537, "step": 7351 }, { "epoch": 1.6399732322105733, "grad_norm": 0.1760861724615097, "learning_rate": 8.647406690824769e-06, "loss": 0.4216, "step": 7352 }, { "epoch": 1.6401962971224626, "grad_norm": 0.20794068276882172, "learning_rate": 8.645074926564334e-06, "loss": 0.4492, "step": 7353 }, { "epoch": 1.640419362034352, "grad_norm": 0.1916939616203308, "learning_rate": 8.64274323734817e-06, "loss": 0.4492, "step": 7354 }, { "epoch": 1.6406424269462414, "grad_norm": 0.17501096427440643, "learning_rate": 8.640411623305425e-06, "loss": 0.4365, "step": 7355 }, { "epoch": 1.6408654918581307, "grad_norm": 0.1728835254907608, "learning_rate": 8.638080084565235e-06, "loss": 0.444, "step": 7356 }, { "epoch": 1.6410885567700202, "grad_norm": 0.17348189651966095, "learning_rate": 8.63574862125674e-06, "loss": 0.4614, "step": 7357 }, { "epoch": 1.6413116216819095, "grad_norm": 0.17011727392673492, "learning_rate": 8.633417233509063e-06, "loss": 0.4539, "step": 7358 }, { "epoch": 1.6415346865937988, "grad_norm": 0.17248336970806122, "learning_rate": 8.63108592145134e-06, "loss": 0.4545, "step": 7359 }, { "epoch": 1.641757751505688, "grad_norm": 0.17574246227741241, "learning_rate": 8.628754685212685e-06, "loss": 0.458, "step": 7360 }, { "epoch": 1.6419808164175775, "grad_norm": 0.19359445571899414, "learning_rate": 8.626423524922224e-06, "loss": 0.4388, "step": 7361 }, { "epoch": 1.642203881329467, "grad_norm": 0.17245686054229736, "learning_rate": 8.624092440709066e-06, "loss": 0.4345, "step": 7362 }, { "epoch": 1.6424269462413563, "grad_norm": 0.180564284324646, "learning_rate": 8.621761432702325e-06, "loss": 0.4665, "step": 7363 }, { "epoch": 1.6426500111532456, "grad_norm": 0.18473738431930542, "learning_rate": 8.619430501031106e-06, "loss": 0.4364, "step": 7364 }, { "epoch": 1.6428730760651349, "grad_norm": 0.18806539475917816, "learning_rate": 8.617099645824509e-06, "loss": 0.4498, "step": 7365 }, { "epoch": 1.6430961409770242, "grad_norm": 0.1734282672405243, "learning_rate": 8.614768867211634e-06, "loss": 0.4782, "step": 7366 }, { "epoch": 1.6433192058889137, "grad_norm": 0.183370441198349, "learning_rate": 8.612438165321571e-06, "loss": 0.4892, "step": 7367 }, { "epoch": 1.6435422708008032, "grad_norm": 0.1682674139738083, "learning_rate": 8.610107540283411e-06, "loss": 0.4256, "step": 7368 }, { "epoch": 1.6437653357126925, "grad_norm": 0.17661243677139282, "learning_rate": 8.607776992226238e-06, "loss": 0.4693, "step": 7369 }, { "epoch": 1.6439884006245817, "grad_norm": 0.2739085257053375, "learning_rate": 8.605446521279132e-06, "loss": 0.461, "step": 7370 }, { "epoch": 1.644211465536471, "grad_norm": 0.1764136105775833, "learning_rate": 8.60311612757117e-06, "loss": 0.4833, "step": 7371 }, { "epoch": 1.6444345304483605, "grad_norm": 0.17037895321846008, "learning_rate": 8.600785811231426e-06, "loss": 0.474, "step": 7372 }, { "epoch": 1.6446575953602498, "grad_norm": 0.17914576828479767, "learning_rate": 8.598455572388961e-06, "loss": 0.4404, "step": 7373 }, { "epoch": 1.6448806602721393, "grad_norm": 0.1834004670381546, "learning_rate": 8.596125411172846e-06, "loss": 0.4738, "step": 7374 }, { "epoch": 1.6451037251840286, "grad_norm": 0.542524516582489, "learning_rate": 8.593795327712135e-06, "loss": 0.4519, "step": 7375 }, { "epoch": 1.6453267900959179, "grad_norm": 0.17706745862960815, "learning_rate": 8.591465322135886e-06, "loss": 0.47, "step": 7376 }, { "epoch": 1.6455498550078071, "grad_norm": 0.18335482478141785, "learning_rate": 8.589135394573146e-06, "loss": 0.4763, "step": 7377 }, { "epoch": 1.6457729199196967, "grad_norm": 0.19544199109077454, "learning_rate": 8.586805545152962e-06, "loss": 0.4459, "step": 7378 }, { "epoch": 1.6459959848315862, "grad_norm": 0.19434663653373718, "learning_rate": 8.584475774004374e-06, "loss": 0.4611, "step": 7379 }, { "epoch": 1.6462190497434754, "grad_norm": 0.17542743682861328, "learning_rate": 8.582146081256428e-06, "loss": 0.4435, "step": 7380 }, { "epoch": 1.6464421146553647, "grad_norm": 0.17821446061134338, "learning_rate": 8.579816467038144e-06, "loss": 0.4674, "step": 7381 }, { "epoch": 1.646665179567254, "grad_norm": 0.16441945731639862, "learning_rate": 8.577486931478563e-06, "loss": 0.4263, "step": 7382 }, { "epoch": 1.6468882444791433, "grad_norm": 0.17425528168678284, "learning_rate": 8.5751574747067e-06, "loss": 0.4491, "step": 7383 }, { "epoch": 1.6471113093910328, "grad_norm": 0.17746862769126892, "learning_rate": 8.572828096851577e-06, "loss": 0.4633, "step": 7384 }, { "epoch": 1.6473343743029223, "grad_norm": 0.16490298509597778, "learning_rate": 8.570498798042217e-06, "loss": 0.4116, "step": 7385 }, { "epoch": 1.6475574392148116, "grad_norm": 0.17692579329013824, "learning_rate": 8.568169578407624e-06, "loss": 0.4585, "step": 7386 }, { "epoch": 1.6477805041267009, "grad_norm": 0.18186244368553162, "learning_rate": 8.565840438076805e-06, "loss": 0.4792, "step": 7387 }, { "epoch": 1.6480035690385901, "grad_norm": 0.16964338719844818, "learning_rate": 8.563511377178764e-06, "loss": 0.4254, "step": 7388 }, { "epoch": 1.6482266339504796, "grad_norm": 0.17917568981647491, "learning_rate": 8.561182395842497e-06, "loss": 0.4823, "step": 7389 }, { "epoch": 1.648449698862369, "grad_norm": 0.1826728731393814, "learning_rate": 8.558853494197e-06, "loss": 0.4297, "step": 7390 }, { "epoch": 1.6486727637742584, "grad_norm": 0.17334018647670746, "learning_rate": 8.556524672371263e-06, "loss": 0.4342, "step": 7391 }, { "epoch": 1.6488958286861477, "grad_norm": 0.17298614978790283, "learning_rate": 8.55419593049427e-06, "loss": 0.4605, "step": 7392 }, { "epoch": 1.649118893598037, "grad_norm": 0.1751953512430191, "learning_rate": 8.551867268694999e-06, "loss": 0.4426, "step": 7393 }, { "epoch": 1.6493419585099263, "grad_norm": 0.17890918254852295, "learning_rate": 8.54953868710243e-06, "loss": 0.4479, "step": 7394 }, { "epoch": 1.6495650234218158, "grad_norm": 0.17455795407295227, "learning_rate": 8.54721018584553e-06, "loss": 0.4452, "step": 7395 }, { "epoch": 1.6497880883337053, "grad_norm": 0.1724436730146408, "learning_rate": 8.54488176505327e-06, "loss": 0.4326, "step": 7396 }, { "epoch": 1.6500111532455946, "grad_norm": 0.17377914488315582, "learning_rate": 8.542553424854608e-06, "loss": 0.4589, "step": 7397 }, { "epoch": 1.6502342181574838, "grad_norm": 0.17500680685043335, "learning_rate": 8.540225165378509e-06, "loss": 0.432, "step": 7398 }, { "epoch": 1.6504572830693731, "grad_norm": 0.17203043401241302, "learning_rate": 8.53789698675392e-06, "loss": 0.4801, "step": 7399 }, { "epoch": 1.6506803479812624, "grad_norm": 0.17373989522457123, "learning_rate": 8.535568889109794e-06, "loss": 0.4431, "step": 7400 }, { "epoch": 1.650903412893152, "grad_norm": 0.1792883723974228, "learning_rate": 8.533240872575073e-06, "loss": 0.4531, "step": 7401 }, { "epoch": 1.6511264778050414, "grad_norm": 0.17690402269363403, "learning_rate": 8.530912937278702e-06, "loss": 0.4548, "step": 7402 }, { "epoch": 1.6513495427169307, "grad_norm": 0.17443451285362244, "learning_rate": 8.52858508334961e-06, "loss": 0.4478, "step": 7403 }, { "epoch": 1.65157260762882, "grad_norm": 0.1743832528591156, "learning_rate": 8.526257310916734e-06, "loss": 0.4732, "step": 7404 }, { "epoch": 1.6517956725407092, "grad_norm": 0.1756555140018463, "learning_rate": 8.523929620108996e-06, "loss": 0.4439, "step": 7405 }, { "epoch": 1.6520187374525988, "grad_norm": 0.19035564363002777, "learning_rate": 8.521602011055324e-06, "loss": 0.4677, "step": 7406 }, { "epoch": 1.652241802364488, "grad_norm": 0.18924179673194885, "learning_rate": 8.519274483884627e-06, "loss": 0.463, "step": 7407 }, { "epoch": 1.6524648672763775, "grad_norm": 0.18173234164714813, "learning_rate": 8.516947038725826e-06, "loss": 0.4318, "step": 7408 }, { "epoch": 1.6526879321882668, "grad_norm": 0.175716370344162, "learning_rate": 8.514619675707828e-06, "loss": 0.4292, "step": 7409 }, { "epoch": 1.652910997100156, "grad_norm": 0.17146100103855133, "learning_rate": 8.512292394959533e-06, "loss": 0.4241, "step": 7410 }, { "epoch": 1.6531340620120454, "grad_norm": 0.17284603416919708, "learning_rate": 8.509965196609846e-06, "loss": 0.4442, "step": 7411 }, { "epoch": 1.6533571269239349, "grad_norm": 0.18271994590759277, "learning_rate": 8.507638080787657e-06, "loss": 0.451, "step": 7412 }, { "epoch": 1.6535801918358244, "grad_norm": 0.17891302704811096, "learning_rate": 8.50531104762186e-06, "loss": 0.4496, "step": 7413 }, { "epoch": 1.6538032567477137, "grad_norm": 0.19851596653461456, "learning_rate": 8.502984097241338e-06, "loss": 0.4715, "step": 7414 }, { "epoch": 1.654026321659603, "grad_norm": 0.18337850272655487, "learning_rate": 8.500657229774975e-06, "loss": 0.439, "step": 7415 }, { "epoch": 1.6542493865714922, "grad_norm": 0.18898166716098785, "learning_rate": 8.498330445351643e-06, "loss": 0.4641, "step": 7416 }, { "epoch": 1.6544724514833815, "grad_norm": 0.18804045021533966, "learning_rate": 8.49600374410022e-06, "loss": 0.4488, "step": 7417 }, { "epoch": 1.654695516395271, "grad_norm": 0.18279941380023956, "learning_rate": 8.493677126149569e-06, "loss": 0.4395, "step": 7418 }, { "epoch": 1.6549185813071605, "grad_norm": 0.18200235068798065, "learning_rate": 8.491350591628554e-06, "loss": 0.4475, "step": 7419 }, { "epoch": 1.6551416462190498, "grad_norm": 0.1727295070886612, "learning_rate": 8.489024140666032e-06, "loss": 0.4627, "step": 7420 }, { "epoch": 1.655364711130939, "grad_norm": 0.17785604298114777, "learning_rate": 8.486697773390859e-06, "loss": 0.4605, "step": 7421 }, { "epoch": 1.6555877760428284, "grad_norm": 0.18455497920513153, "learning_rate": 8.48437148993188e-06, "loss": 0.4463, "step": 7422 }, { "epoch": 1.6558108409547179, "grad_norm": 0.17284157872200012, "learning_rate": 8.482045290417946e-06, "loss": 0.4735, "step": 7423 }, { "epoch": 1.6560339058666071, "grad_norm": 0.1757441759109497, "learning_rate": 8.479719174977887e-06, "loss": 0.4296, "step": 7424 }, { "epoch": 1.6562569707784967, "grad_norm": 0.17652331292629242, "learning_rate": 8.477393143740546e-06, "loss": 0.4289, "step": 7425 }, { "epoch": 1.656480035690386, "grad_norm": 0.17844471335411072, "learning_rate": 8.475067196834749e-06, "loss": 0.4624, "step": 7426 }, { "epoch": 1.6567031006022752, "grad_norm": 0.1853104829788208, "learning_rate": 8.472741334389322e-06, "loss": 0.444, "step": 7427 }, { "epoch": 1.6569261655141645, "grad_norm": 0.20148655772209167, "learning_rate": 8.47041555653309e-06, "loss": 0.4412, "step": 7428 }, { "epoch": 1.657149230426054, "grad_norm": 0.17949336767196655, "learning_rate": 8.468089863394864e-06, "loss": 0.4799, "step": 7429 }, { "epoch": 1.6573722953379435, "grad_norm": 0.1833929866552353, "learning_rate": 8.465764255103457e-06, "loss": 0.459, "step": 7430 }, { "epoch": 1.6575953602498328, "grad_norm": 0.16851405799388885, "learning_rate": 8.463438731787677e-06, "loss": 0.4509, "step": 7431 }, { "epoch": 1.657818425161722, "grad_norm": 0.17039094865322113, "learning_rate": 8.461113293576325e-06, "loss": 0.4591, "step": 7432 }, { "epoch": 1.6580414900736113, "grad_norm": 0.1713404804468155, "learning_rate": 8.4587879405982e-06, "loss": 0.4469, "step": 7433 }, { "epoch": 1.6582645549855006, "grad_norm": 0.1776912361383438, "learning_rate": 8.456462672982092e-06, "loss": 0.4526, "step": 7434 }, { "epoch": 1.6584876198973901, "grad_norm": 0.1807934045791626, "learning_rate": 8.45413749085679e-06, "loss": 0.4689, "step": 7435 }, { "epoch": 1.6587106848092796, "grad_norm": 0.17492172122001648, "learning_rate": 8.451812394351078e-06, "loss": 0.4605, "step": 7436 }, { "epoch": 1.658933749721169, "grad_norm": 0.1770438402891159, "learning_rate": 8.449487383593734e-06, "loss": 0.4603, "step": 7437 }, { "epoch": 1.6591568146330582, "grad_norm": 0.18041646480560303, "learning_rate": 8.447162458713534e-06, "loss": 0.4534, "step": 7438 }, { "epoch": 1.6593798795449475, "grad_norm": 0.1849108189344406, "learning_rate": 8.444837619839243e-06, "loss": 0.4547, "step": 7439 }, { "epoch": 1.659602944456837, "grad_norm": 0.18359196186065674, "learning_rate": 8.442512867099627e-06, "loss": 0.4545, "step": 7440 }, { "epoch": 1.6598260093687263, "grad_norm": 0.19102846086025238, "learning_rate": 8.440188200623445e-06, "loss": 0.4257, "step": 7441 }, { "epoch": 1.6600490742806158, "grad_norm": 0.1707092970609665, "learning_rate": 8.437863620539454e-06, "loss": 0.4307, "step": 7442 }, { "epoch": 1.660272139192505, "grad_norm": 0.18710510432720184, "learning_rate": 8.4355391269764e-06, "loss": 0.4453, "step": 7443 }, { "epoch": 1.6604952041043943, "grad_norm": 0.18393000960350037, "learning_rate": 8.43321472006303e-06, "loss": 0.448, "step": 7444 }, { "epoch": 1.6607182690162836, "grad_norm": 0.16592559218406677, "learning_rate": 8.430890399928085e-06, "loss": 0.4243, "step": 7445 }, { "epoch": 1.6609413339281731, "grad_norm": 0.16804459691047668, "learning_rate": 8.4285661667003e-06, "loss": 0.4357, "step": 7446 }, { "epoch": 1.6611643988400626, "grad_norm": 0.1712721884250641, "learning_rate": 8.426242020508405e-06, "loss": 0.4403, "step": 7447 }, { "epoch": 1.661387463751952, "grad_norm": 0.17568182945251465, "learning_rate": 8.423917961481124e-06, "loss": 0.4489, "step": 7448 }, { "epoch": 1.6616105286638412, "grad_norm": 0.16803157329559326, "learning_rate": 8.421593989747184e-06, "loss": 0.4293, "step": 7449 }, { "epoch": 1.6618335935757305, "grad_norm": 0.18901732563972473, "learning_rate": 8.419270105435294e-06, "loss": 0.4637, "step": 7450 }, { "epoch": 1.6620566584876197, "grad_norm": 0.18891791999340057, "learning_rate": 8.416946308674173e-06, "loss": 0.4602, "step": 7451 }, { "epoch": 1.6622797233995092, "grad_norm": 0.17820779979228973, "learning_rate": 8.414622599592518e-06, "loss": 0.4555, "step": 7452 }, { "epoch": 1.6625027883113987, "grad_norm": 0.18127723038196564, "learning_rate": 8.41229897831904e-06, "loss": 0.4512, "step": 7453 }, { "epoch": 1.662725853223288, "grad_norm": 0.18046575784683228, "learning_rate": 8.40997544498243e-06, "loss": 0.4635, "step": 7454 }, { "epoch": 1.6629489181351773, "grad_norm": 0.1841399371623993, "learning_rate": 8.407651999711383e-06, "loss": 0.4605, "step": 7455 }, { "epoch": 1.6631719830470666, "grad_norm": 0.17511752247810364, "learning_rate": 8.405328642634582e-06, "loss": 0.4308, "step": 7456 }, { "epoch": 1.663395047958956, "grad_norm": 0.17428848147392273, "learning_rate": 8.403005373880713e-06, "loss": 0.4426, "step": 7457 }, { "epoch": 1.6636181128708454, "grad_norm": 0.18597403168678284, "learning_rate": 8.400682193578451e-06, "loss": 0.4476, "step": 7458 }, { "epoch": 1.6638411777827349, "grad_norm": 0.17569150030612946, "learning_rate": 8.398359101856471e-06, "loss": 0.4161, "step": 7459 }, { "epoch": 1.6640642426946242, "grad_norm": 0.17648978531360626, "learning_rate": 8.396036098843438e-06, "loss": 0.4596, "step": 7460 }, { "epoch": 1.6642873076065134, "grad_norm": 0.1786419153213501, "learning_rate": 8.393713184668015e-06, "loss": 0.4623, "step": 7461 }, { "epoch": 1.6645103725184027, "grad_norm": 0.18259434401988983, "learning_rate": 8.391390359458858e-06, "loss": 0.4703, "step": 7462 }, { "epoch": 1.6647334374302922, "grad_norm": 0.18008796870708466, "learning_rate": 8.389067623344625e-06, "loss": 0.464, "step": 7463 }, { "epoch": 1.6649565023421817, "grad_norm": 0.20690587162971497, "learning_rate": 8.386744976453958e-06, "loss": 0.457, "step": 7464 }, { "epoch": 1.665179567254071, "grad_norm": 0.21626955270767212, "learning_rate": 8.384422418915503e-06, "loss": 0.4171, "step": 7465 }, { "epoch": 1.6654026321659603, "grad_norm": 0.16983038187026978, "learning_rate": 8.382099950857895e-06, "loss": 0.434, "step": 7466 }, { "epoch": 1.6656256970778496, "grad_norm": 0.18319158256053925, "learning_rate": 8.379777572409771e-06, "loss": 0.4808, "step": 7467 }, { "epoch": 1.6658487619897389, "grad_norm": 0.2683338522911072, "learning_rate": 8.377455283699758e-06, "loss": 0.4772, "step": 7468 }, { "epoch": 1.6660718269016284, "grad_norm": 0.18146146833896637, "learning_rate": 8.375133084856475e-06, "loss": 0.4226, "step": 7469 }, { "epoch": 1.6662948918135179, "grad_norm": 0.17817704379558563, "learning_rate": 8.372810976008543e-06, "loss": 0.4272, "step": 7470 }, { "epoch": 1.6665179567254071, "grad_norm": 0.18885549902915955, "learning_rate": 8.370488957284574e-06, "loss": 0.4722, "step": 7471 }, { "epoch": 1.6667410216372964, "grad_norm": 0.18117155134677887, "learning_rate": 8.368167028813176e-06, "loss": 0.4377, "step": 7472 }, { "epoch": 1.6669640865491857, "grad_norm": 0.19117340445518494, "learning_rate": 8.365845190722955e-06, "loss": 0.4563, "step": 7473 }, { "epoch": 1.6671871514610752, "grad_norm": 0.1751892864704132, "learning_rate": 8.363523443142503e-06, "loss": 0.4433, "step": 7474 }, { "epoch": 1.6674102163729645, "grad_norm": 0.18443763256072998, "learning_rate": 8.36120178620042e-06, "loss": 0.439, "step": 7475 }, { "epoch": 1.667633281284854, "grad_norm": 0.18026113510131836, "learning_rate": 8.358880220025288e-06, "loss": 0.4275, "step": 7476 }, { "epoch": 1.6678563461967433, "grad_norm": 0.17562150955200195, "learning_rate": 8.356558744745695e-06, "loss": 0.4685, "step": 7477 }, { "epoch": 1.6680794111086326, "grad_norm": 0.1727527379989624, "learning_rate": 8.354237360490212e-06, "loss": 0.4602, "step": 7478 }, { "epoch": 1.6683024760205218, "grad_norm": 0.1706780195236206, "learning_rate": 8.351916067387421e-06, "loss": 0.4431, "step": 7479 }, { "epoch": 1.6685255409324113, "grad_norm": 0.19286386668682098, "learning_rate": 8.349594865565882e-06, "loss": 0.4496, "step": 7480 }, { "epoch": 1.6687486058443008, "grad_norm": 0.2684648931026459, "learning_rate": 8.347273755154164e-06, "loss": 0.4839, "step": 7481 }, { "epoch": 1.6689716707561901, "grad_norm": 0.1720086932182312, "learning_rate": 8.344952736280819e-06, "loss": 0.4253, "step": 7482 }, { "epoch": 1.6691947356680794, "grad_norm": 0.17519108951091766, "learning_rate": 8.342631809074403e-06, "loss": 0.4363, "step": 7483 }, { "epoch": 1.6694178005799687, "grad_norm": 0.18655794858932495, "learning_rate": 8.340310973663461e-06, "loss": 0.4811, "step": 7484 }, { "epoch": 1.669640865491858, "grad_norm": 0.16531935334205627, "learning_rate": 8.33799023017654e-06, "loss": 0.431, "step": 7485 }, { "epoch": 1.6698639304037475, "grad_norm": 0.2258535474538803, "learning_rate": 8.335669578742172e-06, "loss": 0.4407, "step": 7486 }, { "epoch": 1.670086995315637, "grad_norm": 0.17526793479919434, "learning_rate": 8.333349019488893e-06, "loss": 0.4531, "step": 7487 }, { "epoch": 1.6703100602275263, "grad_norm": 0.17840759456157684, "learning_rate": 8.331028552545228e-06, "loss": 0.4374, "step": 7488 }, { "epoch": 1.6705331251394155, "grad_norm": 0.1826542466878891, "learning_rate": 8.328708178039702e-06, "loss": 0.4881, "step": 7489 }, { "epoch": 1.6707561900513048, "grad_norm": 0.17568182945251465, "learning_rate": 8.326387896100827e-06, "loss": 0.4607, "step": 7490 }, { "epoch": 1.6709792549631943, "grad_norm": 0.1761264204978943, "learning_rate": 8.324067706857121e-06, "loss": 0.4399, "step": 7491 }, { "epoch": 1.6712023198750836, "grad_norm": 0.16690705716609955, "learning_rate": 8.321747610437084e-06, "loss": 0.4417, "step": 7492 }, { "epoch": 1.6714253847869731, "grad_norm": 0.17830541729927063, "learning_rate": 8.319427606969223e-06, "loss": 0.4437, "step": 7493 }, { "epoch": 1.6716484496988624, "grad_norm": 0.271932452917099, "learning_rate": 8.317107696582031e-06, "loss": 0.4486, "step": 7494 }, { "epoch": 1.6718715146107517, "grad_norm": 0.19442439079284668, "learning_rate": 8.314787879404002e-06, "loss": 0.474, "step": 7495 }, { "epoch": 1.672094579522641, "grad_norm": 0.1841454654932022, "learning_rate": 8.312468155563623e-06, "loss": 0.4689, "step": 7496 }, { "epoch": 1.6723176444345305, "grad_norm": 0.18820422887802124, "learning_rate": 8.310148525189367e-06, "loss": 0.4349, "step": 7497 }, { "epoch": 1.67254070934642, "grad_norm": 0.17644047737121582, "learning_rate": 8.30782898840972e-06, "loss": 0.4665, "step": 7498 }, { "epoch": 1.6727637742583092, "grad_norm": 0.17768198251724243, "learning_rate": 8.305509545353144e-06, "loss": 0.4551, "step": 7499 }, { "epoch": 1.6729868391701985, "grad_norm": 0.18405990302562714, "learning_rate": 8.303190196148112e-06, "loss": 0.4434, "step": 7500 }, { "epoch": 1.6732099040820878, "grad_norm": 0.17325492203235626, "learning_rate": 8.300870940923077e-06, "loss": 0.4501, "step": 7501 }, { "epoch": 1.673432968993977, "grad_norm": 0.18856076896190643, "learning_rate": 8.2985517798065e-06, "loss": 0.4431, "step": 7502 }, { "epoch": 1.6736560339058666, "grad_norm": 0.1935485303401947, "learning_rate": 8.296232712926826e-06, "loss": 0.4648, "step": 7503 }, { "epoch": 1.673879098817756, "grad_norm": 0.17944234609603882, "learning_rate": 8.293913740412503e-06, "loss": 0.4561, "step": 7504 }, { "epoch": 1.6741021637296454, "grad_norm": 0.17433197796344757, "learning_rate": 8.291594862391966e-06, "loss": 0.4551, "step": 7505 }, { "epoch": 1.6743252286415347, "grad_norm": 0.18297721445560455, "learning_rate": 8.289276078993655e-06, "loss": 0.4609, "step": 7506 }, { "epoch": 1.674548293553424, "grad_norm": 0.17488986253738403, "learning_rate": 8.286957390345994e-06, "loss": 0.4294, "step": 7507 }, { "epoch": 1.6747713584653134, "grad_norm": 0.1750224232673645, "learning_rate": 8.28463879657741e-06, "loss": 0.4609, "step": 7508 }, { "epoch": 1.6749944233772027, "grad_norm": 0.18751104176044464, "learning_rate": 8.282320297816315e-06, "loss": 0.4519, "step": 7509 }, { "epoch": 1.6752174882890922, "grad_norm": 0.17194952070713043, "learning_rate": 8.280001894191132e-06, "loss": 0.4437, "step": 7510 }, { "epoch": 1.6754405532009815, "grad_norm": 0.17177167534828186, "learning_rate": 8.277683585830259e-06, "loss": 0.4455, "step": 7511 }, { "epoch": 1.6756636181128708, "grad_norm": 0.17539672553539276, "learning_rate": 8.275365372862106e-06, "loss": 0.4598, "step": 7512 }, { "epoch": 1.67588668302476, "grad_norm": 0.17903223633766174, "learning_rate": 8.273047255415066e-06, "loss": 0.4473, "step": 7513 }, { "epoch": 1.6761097479366496, "grad_norm": 0.17606431245803833, "learning_rate": 8.27072923361753e-06, "loss": 0.4592, "step": 7514 }, { "epoch": 1.676332812848539, "grad_norm": 0.1900903880596161, "learning_rate": 8.26841130759789e-06, "loss": 0.4735, "step": 7515 }, { "epoch": 1.6765558777604284, "grad_norm": 0.18751583993434906, "learning_rate": 8.26609347748452e-06, "loss": 0.4795, "step": 7516 }, { "epoch": 1.6767789426723176, "grad_norm": 0.18636786937713623, "learning_rate": 8.263775743405804e-06, "loss": 0.4474, "step": 7517 }, { "epoch": 1.677002007584207, "grad_norm": 0.1759779453277588, "learning_rate": 8.261458105490104e-06, "loss": 0.4613, "step": 7518 }, { "epoch": 1.6772250724960962, "grad_norm": 0.16955047845840454, "learning_rate": 8.259140563865796e-06, "loss": 0.4526, "step": 7519 }, { "epoch": 1.6774481374079857, "grad_norm": 0.18776053190231323, "learning_rate": 8.256823118661228e-06, "loss": 0.4265, "step": 7520 }, { "epoch": 1.6776712023198752, "grad_norm": 0.17667481303215027, "learning_rate": 8.254505770004764e-06, "loss": 0.4782, "step": 7521 }, { "epoch": 1.6778942672317645, "grad_norm": 0.17100000381469727, "learning_rate": 8.252188518024748e-06, "loss": 0.464, "step": 7522 }, { "epoch": 1.6781173321436538, "grad_norm": 0.16992557048797607, "learning_rate": 8.24987136284953e-06, "loss": 0.4451, "step": 7523 }, { "epoch": 1.678340397055543, "grad_norm": 0.18691998720169067, "learning_rate": 8.247554304607442e-06, "loss": 0.4701, "step": 7524 }, { "epoch": 1.6785634619674326, "grad_norm": 0.16606706380844116, "learning_rate": 8.24523734342682e-06, "loss": 0.4341, "step": 7525 }, { "epoch": 1.6787865268793218, "grad_norm": 0.16912147402763367, "learning_rate": 8.242920479435991e-06, "loss": 0.4381, "step": 7526 }, { "epoch": 1.6790095917912113, "grad_norm": 0.1704743504524231, "learning_rate": 8.24060371276328e-06, "loss": 0.4458, "step": 7527 }, { "epoch": 1.6792326567031006, "grad_norm": 0.18589746952056885, "learning_rate": 8.238287043537e-06, "loss": 0.4349, "step": 7528 }, { "epoch": 1.67945572161499, "grad_norm": 0.17729128897190094, "learning_rate": 8.235970471885468e-06, "loss": 0.446, "step": 7529 }, { "epoch": 1.6796787865268792, "grad_norm": 0.17116542160511017, "learning_rate": 8.233653997936985e-06, "loss": 0.42, "step": 7530 }, { "epoch": 1.6799018514387687, "grad_norm": 0.1848667562007904, "learning_rate": 8.231337621819858e-06, "loss": 0.4622, "step": 7531 }, { "epoch": 1.6801249163506582, "grad_norm": 0.16781508922576904, "learning_rate": 8.229021343662376e-06, "loss": 0.4326, "step": 7532 }, { "epoch": 1.6803479812625475, "grad_norm": 0.1825597584247589, "learning_rate": 8.22670516359283e-06, "loss": 0.4471, "step": 7533 }, { "epoch": 1.6805710461744368, "grad_norm": 0.17909115552902222, "learning_rate": 8.22438908173951e-06, "loss": 0.464, "step": 7534 }, { "epoch": 1.680794111086326, "grad_norm": 0.16868308186531067, "learning_rate": 8.22207309823069e-06, "loss": 0.4446, "step": 7535 }, { "epoch": 1.6810171759982153, "grad_norm": 0.17775742709636688, "learning_rate": 8.219757213194647e-06, "loss": 0.4638, "step": 7536 }, { "epoch": 1.6812402409101048, "grad_norm": 0.1721707284450531, "learning_rate": 8.217441426759645e-06, "loss": 0.4598, "step": 7537 }, { "epoch": 1.6814633058219943, "grad_norm": 0.17508956789970398, "learning_rate": 8.215125739053953e-06, "loss": 0.4804, "step": 7538 }, { "epoch": 1.6816863707338836, "grad_norm": 0.18540501594543457, "learning_rate": 8.21281015020582e-06, "loss": 0.457, "step": 7539 }, { "epoch": 1.6819094356457729, "grad_norm": 0.19119501113891602, "learning_rate": 8.210494660343508e-06, "loss": 0.4486, "step": 7540 }, { "epoch": 1.6821325005576622, "grad_norm": 0.18080313503742218, "learning_rate": 8.208179269595255e-06, "loss": 0.4727, "step": 7541 }, { "epoch": 1.6823555654695517, "grad_norm": 0.192534402012825, "learning_rate": 8.205863978089308e-06, "loss": 0.4712, "step": 7542 }, { "epoch": 1.682578630381441, "grad_norm": 0.16961976885795593, "learning_rate": 8.203548785953896e-06, "loss": 0.4169, "step": 7543 }, { "epoch": 1.6828016952933305, "grad_norm": 0.17554304003715515, "learning_rate": 8.201233693317254e-06, "loss": 0.4448, "step": 7544 }, { "epoch": 1.6830247602052197, "grad_norm": 0.1828324943780899, "learning_rate": 8.198918700307604e-06, "loss": 0.4758, "step": 7545 }, { "epoch": 1.683247825117109, "grad_norm": 0.1798938661813736, "learning_rate": 8.196603807053167e-06, "loss": 0.4567, "step": 7546 }, { "epoch": 1.6834708900289983, "grad_norm": 0.178616002202034, "learning_rate": 8.194289013682154e-06, "loss": 0.4547, "step": 7547 }, { "epoch": 1.6836939549408878, "grad_norm": 0.20149093866348267, "learning_rate": 8.191974320322776e-06, "loss": 0.4427, "step": 7548 }, { "epoch": 1.6839170198527773, "grad_norm": 0.17913025617599487, "learning_rate": 8.189659727103233e-06, "loss": 0.4371, "step": 7549 }, { "epoch": 1.6841400847646666, "grad_norm": 0.18823112547397614, "learning_rate": 8.18734523415172e-06, "loss": 0.4548, "step": 7550 }, { "epoch": 1.6843631496765559, "grad_norm": 0.17050491273403168, "learning_rate": 8.185030841596431e-06, "loss": 0.4481, "step": 7551 }, { "epoch": 1.6845862145884452, "grad_norm": 0.1750423163175583, "learning_rate": 8.182716549565548e-06, "loss": 0.4527, "step": 7552 }, { "epoch": 1.6848092795003344, "grad_norm": 0.17691361904144287, "learning_rate": 8.180402358187256e-06, "loss": 0.4564, "step": 7553 }, { "epoch": 1.685032344412224, "grad_norm": 0.17147858440876007, "learning_rate": 8.178088267589725e-06, "loss": 0.4572, "step": 7554 }, { "epoch": 1.6852554093241134, "grad_norm": 0.18109926581382751, "learning_rate": 8.175774277901128e-06, "loss": 0.4387, "step": 7555 }, { "epoch": 1.6854784742360027, "grad_norm": 0.17862330377101898, "learning_rate": 8.173460389249625e-06, "loss": 0.4453, "step": 7556 }, { "epoch": 1.685701539147892, "grad_norm": 0.18046163022518158, "learning_rate": 8.171146601763374e-06, "loss": 0.4471, "step": 7557 }, { "epoch": 1.6859246040597813, "grad_norm": 0.172270730137825, "learning_rate": 8.168832915570531e-06, "loss": 0.4167, "step": 7558 }, { "epoch": 1.6861476689716708, "grad_norm": 0.17891794443130493, "learning_rate": 8.166519330799237e-06, "loss": 0.4455, "step": 7559 }, { "epoch": 1.68637073388356, "grad_norm": 0.1716044843196869, "learning_rate": 8.16420584757764e-06, "loss": 0.4451, "step": 7560 }, { "epoch": 1.6865937987954496, "grad_norm": 0.17391957342624664, "learning_rate": 8.161892466033865e-06, "loss": 0.4543, "step": 7561 }, { "epoch": 1.6868168637073389, "grad_norm": 0.17183732986450195, "learning_rate": 8.159579186296052e-06, "loss": 0.4653, "step": 7562 }, { "epoch": 1.6870399286192281, "grad_norm": 0.19677892327308655, "learning_rate": 8.157266008492318e-06, "loss": 0.4543, "step": 7563 }, { "epoch": 1.6872629935311174, "grad_norm": 0.18332447111606598, "learning_rate": 8.154952932750784e-06, "loss": 0.4407, "step": 7564 }, { "epoch": 1.687486058443007, "grad_norm": 0.1903562992811203, "learning_rate": 8.152639959199561e-06, "loss": 0.4464, "step": 7565 }, { "epoch": 1.6877091233548964, "grad_norm": 0.22077789902687073, "learning_rate": 8.150327087966761e-06, "loss": 0.4941, "step": 7566 }, { "epoch": 1.6879321882667857, "grad_norm": 0.172964408993721, "learning_rate": 8.148014319180479e-06, "loss": 0.4584, "step": 7567 }, { "epoch": 1.688155253178675, "grad_norm": 0.16977094113826752, "learning_rate": 8.145701652968814e-06, "loss": 0.4347, "step": 7568 }, { "epoch": 1.6883783180905643, "grad_norm": 0.1772548258304596, "learning_rate": 8.143389089459855e-06, "loss": 0.4701, "step": 7569 }, { "epoch": 1.6886013830024535, "grad_norm": 0.16908633708953857, "learning_rate": 8.14107662878169e-06, "loss": 0.4595, "step": 7570 }, { "epoch": 1.688824447914343, "grad_norm": 0.18083757162094116, "learning_rate": 8.138764271062389e-06, "loss": 0.4919, "step": 7571 }, { "epoch": 1.6890475128262326, "grad_norm": 0.18248751759529114, "learning_rate": 8.136452016430035e-06, "loss": 0.4767, "step": 7572 }, { "epoch": 1.6892705777381218, "grad_norm": 0.1837497055530548, "learning_rate": 8.134139865012688e-06, "loss": 0.4462, "step": 7573 }, { "epoch": 1.6894936426500111, "grad_norm": 0.1747952401638031, "learning_rate": 8.131827816938412e-06, "loss": 0.4586, "step": 7574 }, { "epoch": 1.6897167075619004, "grad_norm": 0.16300569474697113, "learning_rate": 8.129515872335263e-06, "loss": 0.4319, "step": 7575 }, { "epoch": 1.68993977247379, "grad_norm": 0.17720209062099457, "learning_rate": 8.127204031331293e-06, "loss": 0.4447, "step": 7576 }, { "epoch": 1.6901628373856792, "grad_norm": 0.1850651055574417, "learning_rate": 8.12489229405454e-06, "loss": 0.4473, "step": 7577 }, { "epoch": 1.6903859022975687, "grad_norm": 0.17101094126701355, "learning_rate": 8.122580660633048e-06, "loss": 0.4259, "step": 7578 }, { "epoch": 1.690608967209458, "grad_norm": 0.17117485404014587, "learning_rate": 8.12026913119485e-06, "loss": 0.4467, "step": 7579 }, { "epoch": 1.6908320321213473, "grad_norm": 0.18159452080726624, "learning_rate": 8.117957705867971e-06, "loss": 0.4606, "step": 7580 }, { "epoch": 1.6910550970332365, "grad_norm": 0.17214803397655487, "learning_rate": 8.115646384780434e-06, "loss": 0.4454, "step": 7581 }, { "epoch": 1.691278161945126, "grad_norm": 0.17703603208065033, "learning_rate": 8.11333516806025e-06, "loss": 0.4455, "step": 7582 }, { "epoch": 1.6915012268570155, "grad_norm": 0.18035492300987244, "learning_rate": 8.111024055835436e-06, "loss": 0.4668, "step": 7583 }, { "epoch": 1.6917242917689048, "grad_norm": 0.17585453391075134, "learning_rate": 8.108713048233988e-06, "loss": 0.4267, "step": 7584 }, { "epoch": 1.691947356680794, "grad_norm": 0.184658944606781, "learning_rate": 8.106402145383911e-06, "loss": 0.4665, "step": 7585 }, { "epoch": 1.6921704215926834, "grad_norm": 0.19270674884319305, "learning_rate": 8.104091347413192e-06, "loss": 0.4386, "step": 7586 }, { "epoch": 1.6923934865045729, "grad_norm": 0.1800229251384735, "learning_rate": 8.101780654449822e-06, "loss": 0.4618, "step": 7587 }, { "epoch": 1.6926165514164622, "grad_norm": 0.18103908002376556, "learning_rate": 8.099470066621778e-06, "loss": 0.4513, "step": 7588 }, { "epoch": 1.6928396163283517, "grad_norm": 0.16606682538986206, "learning_rate": 8.09715958405704e-06, "loss": 0.4251, "step": 7589 }, { "epoch": 1.693062681240241, "grad_norm": 0.17416086792945862, "learning_rate": 8.094849206883569e-06, "loss": 0.4473, "step": 7590 }, { "epoch": 1.6932857461521302, "grad_norm": 0.21117763221263885, "learning_rate": 8.092538935229336e-06, "loss": 0.4578, "step": 7591 }, { "epoch": 1.6935088110640195, "grad_norm": 0.17628377676010132, "learning_rate": 8.090228769222292e-06, "loss": 0.4716, "step": 7592 }, { "epoch": 1.693731875975909, "grad_norm": 0.1700354665517807, "learning_rate": 8.087918708990396e-06, "loss": 0.4444, "step": 7593 }, { "epoch": 1.6939549408877983, "grad_norm": 0.18636515736579895, "learning_rate": 8.085608754661585e-06, "loss": 0.4843, "step": 7594 }, { "epoch": 1.6941780057996878, "grad_norm": 0.1767621785402298, "learning_rate": 8.083298906363806e-06, "loss": 0.4422, "step": 7595 }, { "epoch": 1.694401070711577, "grad_norm": 0.1970623880624771, "learning_rate": 8.080989164224988e-06, "loss": 0.4432, "step": 7596 }, { "epoch": 1.6946241356234664, "grad_norm": 0.1670975536108017, "learning_rate": 8.078679528373063e-06, "loss": 0.4353, "step": 7597 }, { "epoch": 1.6948472005353556, "grad_norm": 0.18546359241008759, "learning_rate": 8.076369998935951e-06, "loss": 0.4501, "step": 7598 }, { "epoch": 1.6950702654472452, "grad_norm": 0.17603303492069244, "learning_rate": 8.074060576041566e-06, "loss": 0.4451, "step": 7599 }, { "epoch": 1.6952933303591347, "grad_norm": 0.17194673418998718, "learning_rate": 8.071751259817825e-06, "loss": 0.431, "step": 7600 }, { "epoch": 1.695516395271024, "grad_norm": 0.1741739809513092, "learning_rate": 8.069442050392625e-06, "loss": 0.4715, "step": 7601 }, { "epoch": 1.6957394601829132, "grad_norm": 0.24975387752056122, "learning_rate": 8.067132947893872e-06, "loss": 0.4369, "step": 7602 }, { "epoch": 1.6959625250948025, "grad_norm": 0.18625226616859436, "learning_rate": 8.064823952449449e-06, "loss": 0.4464, "step": 7603 }, { "epoch": 1.696185590006692, "grad_norm": 0.1777094006538391, "learning_rate": 8.062515064187253e-06, "loss": 0.4496, "step": 7604 }, { "epoch": 1.6964086549185813, "grad_norm": 0.1841573417186737, "learning_rate": 8.060206283235159e-06, "loss": 0.4731, "step": 7605 }, { "epoch": 1.6966317198304708, "grad_norm": 0.17774668335914612, "learning_rate": 8.05789760972104e-06, "loss": 0.4905, "step": 7606 }, { "epoch": 1.69685478474236, "grad_norm": 0.16449688374996185, "learning_rate": 8.055589043772772e-06, "loss": 0.4542, "step": 7607 }, { "epoch": 1.6970778496542493, "grad_norm": 0.1715865582227707, "learning_rate": 8.053280585518211e-06, "loss": 0.4448, "step": 7608 }, { "epoch": 1.6973009145661386, "grad_norm": 0.18522848188877106, "learning_rate": 8.050972235085217e-06, "loss": 0.4821, "step": 7609 }, { "epoch": 1.6975239794780281, "grad_norm": 0.17819975316524506, "learning_rate": 8.048663992601641e-06, "loss": 0.4306, "step": 7610 }, { "epoch": 1.6977470443899174, "grad_norm": 0.1718638390302658, "learning_rate": 8.046355858195325e-06, "loss": 0.4349, "step": 7611 }, { "epoch": 1.697970109301807, "grad_norm": 0.16903117299079895, "learning_rate": 8.044047831994114e-06, "loss": 0.4348, "step": 7612 }, { "epoch": 1.6981931742136962, "grad_norm": 0.1886761337518692, "learning_rate": 8.041739914125835e-06, "loss": 0.4732, "step": 7613 }, { "epoch": 1.6984162391255855, "grad_norm": 0.20157945156097412, "learning_rate": 8.03943210471832e-06, "loss": 0.4639, "step": 7614 }, { "epoch": 1.6986393040374748, "grad_norm": 0.1816253513097763, "learning_rate": 8.037124403899384e-06, "loss": 0.4429, "step": 7615 }, { "epoch": 1.6988623689493643, "grad_norm": 0.17475511133670807, "learning_rate": 8.03481681179685e-06, "loss": 0.4565, "step": 7616 }, { "epoch": 1.6990854338612538, "grad_norm": 0.17711593210697174, "learning_rate": 8.032509328538518e-06, "loss": 0.4488, "step": 7617 }, { "epoch": 1.699308498773143, "grad_norm": 0.18735013902187347, "learning_rate": 8.030201954252198e-06, "loss": 0.4554, "step": 7618 }, { "epoch": 1.6995315636850323, "grad_norm": 0.5531375408172607, "learning_rate": 8.027894689065684e-06, "loss": 0.4486, "step": 7619 }, { "epoch": 1.6997546285969216, "grad_norm": 0.16994529962539673, "learning_rate": 8.025587533106765e-06, "loss": 0.4126, "step": 7620 }, { "epoch": 1.6999776935088111, "grad_norm": 0.18847712874412537, "learning_rate": 8.02328048650323e-06, "loss": 0.4653, "step": 7621 }, { "epoch": 1.7002007584207004, "grad_norm": 0.17005674540996552, "learning_rate": 8.020973549382855e-06, "loss": 0.4099, "step": 7622 }, { "epoch": 1.70042382333259, "grad_norm": 0.18046452105045319, "learning_rate": 8.018666721873414e-06, "loss": 0.423, "step": 7623 }, { "epoch": 1.7006468882444792, "grad_norm": 0.17824363708496094, "learning_rate": 8.016360004102672e-06, "loss": 0.4659, "step": 7624 }, { "epoch": 1.7008699531563685, "grad_norm": 0.16878539323806763, "learning_rate": 8.014053396198392e-06, "loss": 0.4464, "step": 7625 }, { "epoch": 1.7010930180682577, "grad_norm": 0.1796082705259323, "learning_rate": 8.011746898288326e-06, "loss": 0.4493, "step": 7626 }, { "epoch": 1.7013160829801472, "grad_norm": 0.1814991682767868, "learning_rate": 8.009440510500224e-06, "loss": 0.4588, "step": 7627 }, { "epoch": 1.7015391478920365, "grad_norm": 0.1700638234615326, "learning_rate": 8.007134232961828e-06, "loss": 0.4207, "step": 7628 }, { "epoch": 1.701762212803926, "grad_norm": 0.1950269490480423, "learning_rate": 8.004828065800874e-06, "loss": 0.4338, "step": 7629 }, { "epoch": 1.7019852777158153, "grad_norm": 0.17464493215084076, "learning_rate": 8.00252200914509e-06, "loss": 0.4465, "step": 7630 }, { "epoch": 1.7022083426277046, "grad_norm": 0.19278262555599213, "learning_rate": 8.000216063122205e-06, "loss": 0.4549, "step": 7631 }, { "epoch": 1.7024314075395939, "grad_norm": 0.1833004504442215, "learning_rate": 7.997910227859932e-06, "loss": 0.4433, "step": 7632 }, { "epoch": 1.7026544724514834, "grad_norm": 0.17740319669246674, "learning_rate": 7.995604503485984e-06, "loss": 0.4509, "step": 7633 }, { "epoch": 1.7028775373633729, "grad_norm": 0.17956921458244324, "learning_rate": 7.993298890128067e-06, "loss": 0.4465, "step": 7634 }, { "epoch": 1.7031006022752622, "grad_norm": 0.18017272651195526, "learning_rate": 7.99099338791388e-06, "loss": 0.4817, "step": 7635 }, { "epoch": 1.7033236671871514, "grad_norm": 0.17207284271717072, "learning_rate": 7.988687996971116e-06, "loss": 0.4545, "step": 7636 }, { "epoch": 1.7035467320990407, "grad_norm": 0.19489426910877228, "learning_rate": 7.986382717427461e-06, "loss": 0.4867, "step": 7637 }, { "epoch": 1.7037697970109302, "grad_norm": 0.1766097992658615, "learning_rate": 7.984077549410598e-06, "loss": 0.4792, "step": 7638 }, { "epoch": 1.7039928619228195, "grad_norm": 0.17337632179260254, "learning_rate": 7.981772493048203e-06, "loss": 0.4619, "step": 7639 }, { "epoch": 1.704215926834709, "grad_norm": 0.1748560070991516, "learning_rate": 7.97946754846794e-06, "loss": 0.449, "step": 7640 }, { "epoch": 1.7044389917465983, "grad_norm": 0.18279384076595306, "learning_rate": 7.977162715797476e-06, "loss": 0.4498, "step": 7641 }, { "epoch": 1.7046620566584876, "grad_norm": 0.1735672801733017, "learning_rate": 7.97485799516446e-06, "loss": 0.4588, "step": 7642 }, { "epoch": 1.7048851215703769, "grad_norm": 0.17070254683494568, "learning_rate": 7.972553386696553e-06, "loss": 0.466, "step": 7643 }, { "epoch": 1.7051081864822664, "grad_norm": 0.19621379673480988, "learning_rate": 7.970248890521389e-06, "loss": 0.4496, "step": 7644 }, { "epoch": 1.7053312513941556, "grad_norm": 0.18287599086761475, "learning_rate": 7.967944506766611e-06, "loss": 0.449, "step": 7645 }, { "epoch": 1.7055543163060451, "grad_norm": 0.1747177392244339, "learning_rate": 7.965640235559847e-06, "loss": 0.4491, "step": 7646 }, { "epoch": 1.7057773812179344, "grad_norm": 0.1790647655725479, "learning_rate": 7.963336077028725e-06, "loss": 0.4558, "step": 7647 }, { "epoch": 1.7060004461298237, "grad_norm": 0.1670437902212143, "learning_rate": 7.96103203130086e-06, "loss": 0.4331, "step": 7648 }, { "epoch": 1.706223511041713, "grad_norm": 0.18212758004665375, "learning_rate": 7.95872809850387e-06, "loss": 0.4953, "step": 7649 }, { "epoch": 1.7064465759536025, "grad_norm": 0.17667363584041595, "learning_rate": 7.956424278765354e-06, "loss": 0.4552, "step": 7650 }, { "epoch": 1.706669640865492, "grad_norm": 0.18635283410549164, "learning_rate": 7.95412057221292e-06, "loss": 0.4677, "step": 7651 }, { "epoch": 1.7068927057773813, "grad_norm": 0.1755317598581314, "learning_rate": 7.951816978974154e-06, "loss": 0.4372, "step": 7652 }, { "epoch": 1.7071157706892706, "grad_norm": 0.1793239861726761, "learning_rate": 7.949513499176651e-06, "loss": 0.4509, "step": 7653 }, { "epoch": 1.7073388356011598, "grad_norm": 0.17480173707008362, "learning_rate": 7.947210132947984e-06, "loss": 0.4487, "step": 7654 }, { "epoch": 1.7075619005130493, "grad_norm": 0.19050490856170654, "learning_rate": 7.944906880415738e-06, "loss": 0.4449, "step": 7655 }, { "epoch": 1.7077849654249386, "grad_norm": 0.17552480101585388, "learning_rate": 7.94260374170747e-06, "loss": 0.4457, "step": 7656 }, { "epoch": 1.7080080303368281, "grad_norm": 0.17692866921424866, "learning_rate": 7.940300716950753e-06, "loss": 0.4179, "step": 7657 }, { "epoch": 1.7082310952487174, "grad_norm": 0.18498113751411438, "learning_rate": 7.937997806273135e-06, "loss": 0.4221, "step": 7658 }, { "epoch": 1.7084541601606067, "grad_norm": 0.18404224514961243, "learning_rate": 7.935695009802172e-06, "loss": 0.4682, "step": 7659 }, { "epoch": 1.708677225072496, "grad_norm": 0.18323245644569397, "learning_rate": 7.933392327665403e-06, "loss": 0.4541, "step": 7660 }, { "epoch": 1.7089002899843855, "grad_norm": 0.17359468340873718, "learning_rate": 7.931089759990367e-06, "loss": 0.4504, "step": 7661 }, { "epoch": 1.709123354896275, "grad_norm": 0.1712869256734848, "learning_rate": 7.928787306904593e-06, "loss": 0.45, "step": 7662 }, { "epoch": 1.7093464198081643, "grad_norm": 0.17846348881721497, "learning_rate": 7.926484968535604e-06, "loss": 0.4644, "step": 7663 }, { "epoch": 1.7095694847200535, "grad_norm": 0.18150021135807037, "learning_rate": 7.924182745010926e-06, "loss": 0.4206, "step": 7664 }, { "epoch": 1.7097925496319428, "grad_norm": 0.17358383536338806, "learning_rate": 7.921880636458061e-06, "loss": 0.4492, "step": 7665 }, { "epoch": 1.710015614543832, "grad_norm": 0.18115226924419403, "learning_rate": 7.919578643004519e-06, "loss": 0.4601, "step": 7666 }, { "epoch": 1.7102386794557216, "grad_norm": 0.17906229197978973, "learning_rate": 7.917276764777799e-06, "loss": 0.4428, "step": 7667 }, { "epoch": 1.7104617443676111, "grad_norm": 0.1732749044895172, "learning_rate": 7.914975001905393e-06, "loss": 0.4274, "step": 7668 }, { "epoch": 1.7106848092795004, "grad_norm": 0.1772816926240921, "learning_rate": 7.912673354514784e-06, "loss": 0.4574, "step": 7669 }, { "epoch": 1.7109078741913897, "grad_norm": 0.18031185865402222, "learning_rate": 7.910371822733458e-06, "loss": 0.4498, "step": 7670 }, { "epoch": 1.711130939103279, "grad_norm": 0.18245866894721985, "learning_rate": 7.908070406688881e-06, "loss": 0.4271, "step": 7671 }, { "epoch": 1.7113540040151685, "grad_norm": 0.17715051770210266, "learning_rate": 7.905769106508527e-06, "loss": 0.4714, "step": 7672 }, { "epoch": 1.7115770689270577, "grad_norm": 0.18403951823711395, "learning_rate": 7.90346792231985e-06, "loss": 0.4349, "step": 7673 }, { "epoch": 1.7118001338389472, "grad_norm": 0.1747589409351349, "learning_rate": 7.90116685425031e-06, "loss": 0.4318, "step": 7674 }, { "epoch": 1.7120231987508365, "grad_norm": 0.18269647657871246, "learning_rate": 7.898865902427351e-06, "loss": 0.4687, "step": 7675 }, { "epoch": 1.7122462636627258, "grad_norm": 0.18082202970981598, "learning_rate": 7.896565066978416e-06, "loss": 0.4601, "step": 7676 }, { "epoch": 1.712469328574615, "grad_norm": 0.16931264102458954, "learning_rate": 7.894264348030935e-06, "loss": 0.4478, "step": 7677 }, { "epoch": 1.7126923934865046, "grad_norm": 0.1926901787519455, "learning_rate": 7.891963745712344e-06, "loss": 0.4334, "step": 7678 }, { "epoch": 1.712915458398394, "grad_norm": 0.18817317485809326, "learning_rate": 7.88966326015006e-06, "loss": 0.4705, "step": 7679 }, { "epoch": 1.7131385233102834, "grad_norm": 0.18068066239356995, "learning_rate": 7.887362891471499e-06, "loss": 0.4796, "step": 7680 }, { "epoch": 1.7133615882221727, "grad_norm": 0.17936988174915314, "learning_rate": 7.885062639804068e-06, "loss": 0.4743, "step": 7681 }, { "epoch": 1.713584653134062, "grad_norm": 0.17455992102622986, "learning_rate": 7.882762505275175e-06, "loss": 0.4528, "step": 7682 }, { "epoch": 1.7138077180459512, "grad_norm": 0.17338865995407104, "learning_rate": 7.880462488012208e-06, "loss": 0.4324, "step": 7683 }, { "epoch": 1.7140307829578407, "grad_norm": 0.18230746686458588, "learning_rate": 7.878162588142563e-06, "loss": 0.4634, "step": 7684 }, { "epoch": 1.7142538478697302, "grad_norm": 0.21422216296195984, "learning_rate": 7.875862805793622e-06, "loss": 0.4554, "step": 7685 }, { "epoch": 1.7144769127816195, "grad_norm": 0.17937451601028442, "learning_rate": 7.873563141092759e-06, "loss": 0.4646, "step": 7686 }, { "epoch": 1.7146999776935088, "grad_norm": 0.17571154236793518, "learning_rate": 7.871263594167345e-06, "loss": 0.4408, "step": 7687 }, { "epoch": 1.714923042605398, "grad_norm": 0.18515697121620178, "learning_rate": 7.868964165144743e-06, "loss": 0.4778, "step": 7688 }, { "epoch": 1.7151461075172876, "grad_norm": 0.17536167800426483, "learning_rate": 7.866664854152312e-06, "loss": 0.4284, "step": 7689 }, { "epoch": 1.7153691724291769, "grad_norm": 0.17925989627838135, "learning_rate": 7.864365661317399e-06, "loss": 0.4425, "step": 7690 }, { "epoch": 1.7155922373410664, "grad_norm": 0.17760370671749115, "learning_rate": 7.86206658676735e-06, "loss": 0.4521, "step": 7691 }, { "epoch": 1.7158153022529556, "grad_norm": 0.1762177050113678, "learning_rate": 7.8597676306295e-06, "loss": 0.4415, "step": 7692 }, { "epoch": 1.716038367164845, "grad_norm": 0.17917610704898834, "learning_rate": 7.857468793031185e-06, "loss": 0.4356, "step": 7693 }, { "epoch": 1.7162614320767342, "grad_norm": 0.17529255151748657, "learning_rate": 7.855170074099723e-06, "loss": 0.4345, "step": 7694 }, { "epoch": 1.7164844969886237, "grad_norm": 0.17721855640411377, "learning_rate": 7.852871473962435e-06, "loss": 0.4607, "step": 7695 }, { "epoch": 1.7167075619005132, "grad_norm": 0.18101619184017181, "learning_rate": 7.850572992746628e-06, "loss": 0.4568, "step": 7696 }, { "epoch": 1.7169306268124025, "grad_norm": 0.1923847496509552, "learning_rate": 7.848274630579611e-06, "loss": 0.4492, "step": 7697 }, { "epoch": 1.7171536917242918, "grad_norm": 0.1815698742866516, "learning_rate": 7.845976387588679e-06, "loss": 0.4323, "step": 7698 }, { "epoch": 1.717376756636181, "grad_norm": 0.1835760623216629, "learning_rate": 7.843678263901125e-06, "loss": 0.4363, "step": 7699 }, { "epoch": 1.7175998215480703, "grad_norm": 0.1698540896177292, "learning_rate": 7.841380259644231e-06, "loss": 0.4399, "step": 7700 }, { "epoch": 1.7178228864599598, "grad_norm": 0.1790151596069336, "learning_rate": 7.83908237494528e-06, "loss": 0.4504, "step": 7701 }, { "epoch": 1.7180459513718493, "grad_norm": 0.18497934937477112, "learning_rate": 7.836784609931536e-06, "loss": 0.4655, "step": 7702 }, { "epoch": 1.7182690162837386, "grad_norm": 0.19526800513267517, "learning_rate": 7.83448696473027e-06, "loss": 0.4419, "step": 7703 }, { "epoch": 1.718492081195628, "grad_norm": 0.1786162108182907, "learning_rate": 7.832189439468734e-06, "loss": 0.4286, "step": 7704 }, { "epoch": 1.7187151461075172, "grad_norm": 0.18116159737110138, "learning_rate": 7.829892034274184e-06, "loss": 0.4799, "step": 7705 }, { "epoch": 1.7189382110194067, "grad_norm": 0.1766580492258072, "learning_rate": 7.827594749273867e-06, "loss": 0.4692, "step": 7706 }, { "epoch": 1.719161275931296, "grad_norm": 0.1848170906305313, "learning_rate": 7.825297584595014e-06, "loss": 0.4547, "step": 7707 }, { "epoch": 1.7193843408431855, "grad_norm": 0.17963701486587524, "learning_rate": 7.823000540364865e-06, "loss": 0.4674, "step": 7708 }, { "epoch": 1.7196074057550748, "grad_norm": 0.183467298746109, "learning_rate": 7.820703616710634e-06, "loss": 0.4579, "step": 7709 }, { "epoch": 1.719830470666964, "grad_norm": 0.17056439816951752, "learning_rate": 7.81840681375955e-06, "loss": 0.4638, "step": 7710 }, { "epoch": 1.7200535355788533, "grad_norm": 0.18178224563598633, "learning_rate": 7.816110131638815e-06, "loss": 0.486, "step": 7711 }, { "epoch": 1.7202766004907428, "grad_norm": 0.20546191930770874, "learning_rate": 7.813813570475645e-06, "loss": 0.4659, "step": 7712 }, { "epoch": 1.7204996654026323, "grad_norm": 0.18711180984973907, "learning_rate": 7.811517130397227e-06, "loss": 0.4554, "step": 7713 }, { "epoch": 1.7207227303145216, "grad_norm": 0.17499269545078278, "learning_rate": 7.809220811530755e-06, "loss": 0.434, "step": 7714 }, { "epoch": 1.720945795226411, "grad_norm": 0.1984056979417801, "learning_rate": 7.806924614003416e-06, "loss": 0.4382, "step": 7715 }, { "epoch": 1.7211688601383002, "grad_norm": 0.20512497425079346, "learning_rate": 7.804628537942386e-06, "loss": 0.4726, "step": 7716 }, { "epoch": 1.7213919250501895, "grad_norm": 0.18379102647304535, "learning_rate": 7.80233258347484e-06, "loss": 0.4779, "step": 7717 }, { "epoch": 1.721614989962079, "grad_norm": 0.17394806444644928, "learning_rate": 7.800036750727935e-06, "loss": 0.4678, "step": 7718 }, { "epoch": 1.7218380548739685, "grad_norm": 0.1689169555902481, "learning_rate": 7.797741039828836e-06, "loss": 0.4272, "step": 7719 }, { "epoch": 1.7220611197858577, "grad_norm": 0.17023077607154846, "learning_rate": 7.79544545090469e-06, "loss": 0.4591, "step": 7720 }, { "epoch": 1.722284184697747, "grad_norm": 0.18222182989120483, "learning_rate": 7.793149984082643e-06, "loss": 0.4452, "step": 7721 }, { "epoch": 1.7225072496096363, "grad_norm": 0.18358302116394043, "learning_rate": 7.79085463948983e-06, "loss": 0.4537, "step": 7722 }, { "epoch": 1.7227303145215258, "grad_norm": 0.1760125756263733, "learning_rate": 7.788559417253386e-06, "loss": 0.4384, "step": 7723 }, { "epoch": 1.722953379433415, "grad_norm": 0.1962590217590332, "learning_rate": 7.786264317500429e-06, "loss": 0.4778, "step": 7724 }, { "epoch": 1.7231764443453046, "grad_norm": 0.18565121293067932, "learning_rate": 7.783969340358081e-06, "loss": 0.4379, "step": 7725 }, { "epoch": 1.7233995092571939, "grad_norm": 0.19673305749893188, "learning_rate": 7.781674485953448e-06, "loss": 0.4361, "step": 7726 }, { "epoch": 1.7236225741690832, "grad_norm": 0.17665378749370575, "learning_rate": 7.779379754413636e-06, "loss": 0.4323, "step": 7727 }, { "epoch": 1.7238456390809724, "grad_norm": 0.16950669884681702, "learning_rate": 7.777085145865744e-06, "loss": 0.4348, "step": 7728 }, { "epoch": 1.724068703992862, "grad_norm": 0.17674678564071655, "learning_rate": 7.774790660436857e-06, "loss": 0.4745, "step": 7729 }, { "epoch": 1.7242917689047514, "grad_norm": 0.183528333902359, "learning_rate": 7.772496298254063e-06, "loss": 0.4611, "step": 7730 }, { "epoch": 1.7245148338166407, "grad_norm": 0.18080073595046997, "learning_rate": 7.770202059444433e-06, "loss": 0.4605, "step": 7731 }, { "epoch": 1.72473789872853, "grad_norm": 0.18171045184135437, "learning_rate": 7.76790794413504e-06, "loss": 0.4527, "step": 7732 }, { "epoch": 1.7249609636404193, "grad_norm": 0.18763373792171478, "learning_rate": 7.765613952452945e-06, "loss": 0.4559, "step": 7733 }, { "epoch": 1.7251840285523086, "grad_norm": 0.1811000555753708, "learning_rate": 7.763320084525205e-06, "loss": 0.4536, "step": 7734 }, { "epoch": 1.725407093464198, "grad_norm": 0.1753537356853485, "learning_rate": 7.761026340478866e-06, "loss": 0.4383, "step": 7735 }, { "epoch": 1.7256301583760876, "grad_norm": 0.1794290989637375, "learning_rate": 7.758732720440976e-06, "loss": 0.4593, "step": 7736 }, { "epoch": 1.7258532232879769, "grad_norm": 0.18469299376010895, "learning_rate": 7.75643922453856e-06, "loss": 0.4239, "step": 7737 }, { "epoch": 1.7260762881998661, "grad_norm": 0.17508934438228607, "learning_rate": 7.754145852898658e-06, "loss": 0.4473, "step": 7738 }, { "epoch": 1.7262993531117554, "grad_norm": 0.18345895409584045, "learning_rate": 7.751852605648283e-06, "loss": 0.45, "step": 7739 }, { "epoch": 1.726522418023645, "grad_norm": 0.18025921285152435, "learning_rate": 7.749559482914453e-06, "loss": 0.4645, "step": 7740 }, { "epoch": 1.7267454829355342, "grad_norm": 0.1723143756389618, "learning_rate": 7.747266484824174e-06, "loss": 0.4087, "step": 7741 }, { "epoch": 1.7269685478474237, "grad_norm": 0.17820636928081512, "learning_rate": 7.744973611504448e-06, "loss": 0.451, "step": 7742 }, { "epoch": 1.727191612759313, "grad_norm": 0.17735761404037476, "learning_rate": 7.742680863082267e-06, "loss": 0.435, "step": 7743 }, { "epoch": 1.7274146776712023, "grad_norm": 0.169492706656456, "learning_rate": 7.74038823968462e-06, "loss": 0.4232, "step": 7744 }, { "epoch": 1.7276377425830916, "grad_norm": 0.17863723635673523, "learning_rate": 7.738095741438485e-06, "loss": 0.4407, "step": 7745 }, { "epoch": 1.727860807494981, "grad_norm": 0.17826594412326813, "learning_rate": 7.735803368470836e-06, "loss": 0.4568, "step": 7746 }, { "epoch": 1.7280838724068706, "grad_norm": 0.18301434814929962, "learning_rate": 7.733511120908639e-06, "loss": 0.471, "step": 7747 }, { "epoch": 1.7283069373187598, "grad_norm": 0.17796590924263, "learning_rate": 7.73121899887885e-06, "loss": 0.4517, "step": 7748 }, { "epoch": 1.7285300022306491, "grad_norm": 0.17701533436775208, "learning_rate": 7.728927002508429e-06, "loss": 0.4672, "step": 7749 }, { "epoch": 1.7287530671425384, "grad_norm": 0.17282108962535858, "learning_rate": 7.726635131924313e-06, "loss": 0.4205, "step": 7750 }, { "epoch": 1.7289761320544277, "grad_norm": 0.17632122337818146, "learning_rate": 7.724343387253446e-06, "loss": 0.4561, "step": 7751 }, { "epoch": 1.7291991969663172, "grad_norm": 0.18215633928775787, "learning_rate": 7.722051768622754e-06, "loss": 0.4757, "step": 7752 }, { "epoch": 1.7294222618782067, "grad_norm": 0.1942843198776245, "learning_rate": 7.719760276159166e-06, "loss": 0.4407, "step": 7753 }, { "epoch": 1.729645326790096, "grad_norm": 0.18691153824329376, "learning_rate": 7.717468909989595e-06, "loss": 0.4524, "step": 7754 }, { "epoch": 1.7298683917019853, "grad_norm": 0.16908486187458038, "learning_rate": 7.715177670240956e-06, "loss": 0.4627, "step": 7755 }, { "epoch": 1.7300914566138745, "grad_norm": 0.1892176866531372, "learning_rate": 7.712886557040147e-06, "loss": 0.4367, "step": 7756 }, { "epoch": 1.730314521525764, "grad_norm": 0.17933063209056854, "learning_rate": 7.71059557051407e-06, "loss": 0.4912, "step": 7757 }, { "epoch": 1.7305375864376533, "grad_norm": 0.17910033464431763, "learning_rate": 7.70830471078961e-06, "loss": 0.4412, "step": 7758 }, { "epoch": 1.7307606513495428, "grad_norm": 0.17961618304252625, "learning_rate": 7.70601397799365e-06, "loss": 0.441, "step": 7759 }, { "epoch": 1.730983716261432, "grad_norm": 0.17368407547473907, "learning_rate": 7.703723372253064e-06, "loss": 0.4236, "step": 7760 }, { "epoch": 1.7312067811733214, "grad_norm": 0.16743780672550201, "learning_rate": 7.701432893694727e-06, "loss": 0.4032, "step": 7761 }, { "epoch": 1.7314298460852107, "grad_norm": 0.17270828783512115, "learning_rate": 7.699142542445489e-06, "loss": 0.4428, "step": 7762 }, { "epoch": 1.7316529109971002, "grad_norm": 0.18314076960086823, "learning_rate": 7.696852318632214e-06, "loss": 0.4728, "step": 7763 }, { "epoch": 1.7318759759089897, "grad_norm": 0.17854903638362885, "learning_rate": 7.694562222381741e-06, "loss": 0.4628, "step": 7764 }, { "epoch": 1.732099040820879, "grad_norm": 0.17335215210914612, "learning_rate": 7.692272253820918e-06, "loss": 0.4492, "step": 7765 }, { "epoch": 1.7323221057327682, "grad_norm": 0.19753512740135193, "learning_rate": 7.68998241307657e-06, "loss": 0.4499, "step": 7766 }, { "epoch": 1.7325451706446575, "grad_norm": 0.17499345541000366, "learning_rate": 7.687692700275532e-06, "loss": 0.4394, "step": 7767 }, { "epoch": 1.7327682355565468, "grad_norm": 0.17762510478496552, "learning_rate": 7.685403115544612e-06, "loss": 0.4656, "step": 7768 }, { "epoch": 1.7329913004684363, "grad_norm": 0.1747395098209381, "learning_rate": 7.683113659010628e-06, "loss": 0.4431, "step": 7769 }, { "epoch": 1.7332143653803258, "grad_norm": 0.1724051684141159, "learning_rate": 7.680824330800384e-06, "loss": 0.4266, "step": 7770 }, { "epoch": 1.733437430292215, "grad_norm": 0.1789834052324295, "learning_rate": 7.678535131040676e-06, "loss": 0.4342, "step": 7771 }, { "epoch": 1.7336604952041044, "grad_norm": 0.18412499129772186, "learning_rate": 7.676246059858297e-06, "loss": 0.4515, "step": 7772 }, { "epoch": 1.7338835601159936, "grad_norm": 0.18302331864833832, "learning_rate": 7.673957117380027e-06, "loss": 0.4316, "step": 7773 }, { "epoch": 1.7341066250278832, "grad_norm": 0.18772312998771667, "learning_rate": 7.671668303732644e-06, "loss": 0.4918, "step": 7774 }, { "epoch": 1.7343296899397724, "grad_norm": 0.21190685033798218, "learning_rate": 7.669379619042915e-06, "loss": 0.4785, "step": 7775 }, { "epoch": 1.734552754851662, "grad_norm": 0.18342965841293335, "learning_rate": 7.667091063437604e-06, "loss": 0.4516, "step": 7776 }, { "epoch": 1.7347758197635512, "grad_norm": 0.1735561341047287, "learning_rate": 7.664802637043463e-06, "loss": 0.426, "step": 7777 }, { "epoch": 1.7349988846754405, "grad_norm": 0.18314534425735474, "learning_rate": 7.662514339987243e-06, "loss": 0.4523, "step": 7778 }, { "epoch": 1.7352219495873298, "grad_norm": 0.18044976890087128, "learning_rate": 7.66022617239568e-06, "loss": 0.4398, "step": 7779 }, { "epoch": 1.7354450144992193, "grad_norm": 0.18999162316322327, "learning_rate": 7.65793813439551e-06, "loss": 0.4582, "step": 7780 }, { "epoch": 1.7356680794111088, "grad_norm": 0.1812235713005066, "learning_rate": 7.655650226113458e-06, "loss": 0.4576, "step": 7781 }, { "epoch": 1.735891144322998, "grad_norm": 0.1805853396654129, "learning_rate": 7.653362447676245e-06, "loss": 0.4331, "step": 7782 }, { "epoch": 1.7361142092348874, "grad_norm": 0.19444364309310913, "learning_rate": 7.651074799210578e-06, "loss": 0.4718, "step": 7783 }, { "epoch": 1.7363372741467766, "grad_norm": 0.17560544610023499, "learning_rate": 7.648787280843167e-06, "loss": 0.4434, "step": 7784 }, { "epoch": 1.736560339058666, "grad_norm": 0.17922593653202057, "learning_rate": 7.646499892700703e-06, "loss": 0.4171, "step": 7785 }, { "epoch": 1.7367834039705554, "grad_norm": 0.17291361093521118, "learning_rate": 7.644212634909881e-06, "loss": 0.4309, "step": 7786 }, { "epoch": 1.737006468882445, "grad_norm": 0.2205720990896225, "learning_rate": 7.641925507597381e-06, "loss": 0.4471, "step": 7787 }, { "epoch": 1.7372295337943342, "grad_norm": 0.2528192698955536, "learning_rate": 7.63963851088988e-06, "loss": 0.463, "step": 7788 }, { "epoch": 1.7374525987062235, "grad_norm": 0.1895085573196411, "learning_rate": 7.637351644914044e-06, "loss": 0.4486, "step": 7789 }, { "epoch": 1.7376756636181128, "grad_norm": 0.17160922288894653, "learning_rate": 7.635064909796534e-06, "loss": 0.4531, "step": 7790 }, { "epoch": 1.7378987285300023, "grad_norm": 0.17404474318027496, "learning_rate": 7.63277830566401e-06, "loss": 0.4446, "step": 7791 }, { "epoch": 1.7381217934418915, "grad_norm": 0.17817366123199463, "learning_rate": 7.63049183264311e-06, "loss": 0.4627, "step": 7792 }, { "epoch": 1.738344858353781, "grad_norm": 0.1933346390724182, "learning_rate": 7.628205490860482e-06, "loss": 0.4744, "step": 7793 }, { "epoch": 1.7385679232656703, "grad_norm": 0.1809963434934616, "learning_rate": 7.625919280442751e-06, "loss": 0.4791, "step": 7794 }, { "epoch": 1.7387909881775596, "grad_norm": 0.31423863768577576, "learning_rate": 7.623633201516542e-06, "loss": 0.4465, "step": 7795 }, { "epoch": 1.739014053089449, "grad_norm": 0.19363394379615784, "learning_rate": 7.621347254208475e-06, "loss": 0.4696, "step": 7796 }, { "epoch": 1.7392371180013384, "grad_norm": 0.18826474249362946, "learning_rate": 7.619061438645159e-06, "loss": 0.4746, "step": 7797 }, { "epoch": 1.739460182913228, "grad_norm": 0.16698677837848663, "learning_rate": 7.616775754953199e-06, "loss": 0.4349, "step": 7798 }, { "epoch": 1.7396832478251172, "grad_norm": 0.17490437626838684, "learning_rate": 7.614490203259186e-06, "loss": 0.4237, "step": 7799 }, { "epoch": 1.7399063127370065, "grad_norm": 0.17656764388084412, "learning_rate": 7.6122047836897125e-06, "loss": 0.4583, "step": 7800 }, { "epoch": 1.7401293776488957, "grad_norm": 0.17693257331848145, "learning_rate": 7.609919496371357e-06, "loss": 0.454, "step": 7801 }, { "epoch": 1.740352442560785, "grad_norm": 0.1721879243850708, "learning_rate": 7.6076343414306965e-06, "loss": 0.4373, "step": 7802 }, { "epoch": 1.7405755074726745, "grad_norm": 0.17377860844135284, "learning_rate": 7.605349318994291e-06, "loss": 0.4456, "step": 7803 }, { "epoch": 1.740798572384564, "grad_norm": 0.17918775975704193, "learning_rate": 7.603064429188707e-06, "loss": 0.4473, "step": 7804 }, { "epoch": 1.7410216372964533, "grad_norm": 0.17350159585475922, "learning_rate": 7.600779672140488e-06, "loss": 0.4505, "step": 7805 }, { "epoch": 1.7412447022083426, "grad_norm": 0.1735851764678955, "learning_rate": 7.5984950479761865e-06, "loss": 0.432, "step": 7806 }, { "epoch": 1.7414677671202319, "grad_norm": 0.19026067852973938, "learning_rate": 7.596210556822332e-06, "loss": 0.4668, "step": 7807 }, { "epoch": 1.7416908320321214, "grad_norm": 0.17454437911510468, "learning_rate": 7.59392619880546e-06, "loss": 0.4583, "step": 7808 }, { "epoch": 1.7419138969440107, "grad_norm": 0.17719118297100067, "learning_rate": 7.591641974052089e-06, "loss": 0.4214, "step": 7809 }, { "epoch": 1.7421369618559002, "grad_norm": 0.17559269070625305, "learning_rate": 7.589357882688735e-06, "loss": 0.4234, "step": 7810 }, { "epoch": 1.7423600267677894, "grad_norm": 0.17429721355438232, "learning_rate": 7.587073924841904e-06, "loss": 0.4205, "step": 7811 }, { "epoch": 1.7425830916796787, "grad_norm": 0.18059833347797394, "learning_rate": 7.584790100638098e-06, "loss": 0.4475, "step": 7812 }, { "epoch": 1.742806156591568, "grad_norm": 0.1845857799053192, "learning_rate": 7.582506410203806e-06, "loss": 0.4408, "step": 7813 }, { "epoch": 1.7430292215034575, "grad_norm": 0.18028376996517181, "learning_rate": 7.580222853665517e-06, "loss": 0.4213, "step": 7814 }, { "epoch": 1.743252286415347, "grad_norm": 0.1828741878271103, "learning_rate": 7.577939431149709e-06, "loss": 0.4559, "step": 7815 }, { "epoch": 1.7434753513272363, "grad_norm": 0.19069290161132812, "learning_rate": 7.575656142782848e-06, "loss": 0.4372, "step": 7816 }, { "epoch": 1.7436984162391256, "grad_norm": 0.1698480099439621, "learning_rate": 7.573372988691402e-06, "loss": 0.431, "step": 7817 }, { "epoch": 1.7439214811510149, "grad_norm": 0.17554740607738495, "learning_rate": 7.5710899690018226e-06, "loss": 0.4536, "step": 7818 }, { "epoch": 1.7441445460629041, "grad_norm": 0.17417636513710022, "learning_rate": 7.568807083840561e-06, "loss": 0.4561, "step": 7819 }, { "epoch": 1.7443676109747936, "grad_norm": 0.1864258497953415, "learning_rate": 7.566524333334053e-06, "loss": 0.4933, "step": 7820 }, { "epoch": 1.7445906758866832, "grad_norm": 0.20237578451633453, "learning_rate": 7.564241717608737e-06, "loss": 0.4429, "step": 7821 }, { "epoch": 1.7448137407985724, "grad_norm": 0.18540610373020172, "learning_rate": 7.5619592367910345e-06, "loss": 0.4572, "step": 7822 }, { "epoch": 1.7450368057104617, "grad_norm": 0.17512571811676025, "learning_rate": 7.5596768910073674e-06, "loss": 0.4408, "step": 7823 }, { "epoch": 1.745259870622351, "grad_norm": 0.19032084941864014, "learning_rate": 7.557394680384142e-06, "loss": 0.4466, "step": 7824 }, { "epoch": 1.7454829355342405, "grad_norm": 0.18703311681747437, "learning_rate": 7.555112605047766e-06, "loss": 0.4681, "step": 7825 }, { "epoch": 1.7457060004461298, "grad_norm": 0.17146390676498413, "learning_rate": 7.55283066512463e-06, "loss": 0.4518, "step": 7826 }, { "epoch": 1.7459290653580193, "grad_norm": 0.17544762790203094, "learning_rate": 7.550548860741128e-06, "loss": 0.4459, "step": 7827 }, { "epoch": 1.7461521302699086, "grad_norm": 0.16931942105293274, "learning_rate": 7.548267192023636e-06, "loss": 0.4317, "step": 7828 }, { "epoch": 1.7463751951817978, "grad_norm": 0.17575553059577942, "learning_rate": 7.545985659098531e-06, "loss": 0.4239, "step": 7829 }, { "epoch": 1.7465982600936871, "grad_norm": 0.1667536497116089, "learning_rate": 7.543704262092174e-06, "loss": 0.4351, "step": 7830 }, { "epoch": 1.7468213250055766, "grad_norm": 0.17519286274909973, "learning_rate": 7.541423001130928e-06, "loss": 0.4405, "step": 7831 }, { "epoch": 1.7470443899174661, "grad_norm": 0.7807138562202454, "learning_rate": 7.5391418763411385e-06, "loss": 0.4395, "step": 7832 }, { "epoch": 1.7472674548293554, "grad_norm": 0.17985686659812927, "learning_rate": 7.536860887849152e-06, "loss": 0.4564, "step": 7833 }, { "epoch": 1.7474905197412447, "grad_norm": 0.18918108940124512, "learning_rate": 7.534580035781304e-06, "loss": 0.4898, "step": 7834 }, { "epoch": 1.747713584653134, "grad_norm": 0.19184869527816772, "learning_rate": 7.532299320263922e-06, "loss": 0.4714, "step": 7835 }, { "epoch": 1.7479366495650233, "grad_norm": 0.18925781548023224, "learning_rate": 7.530018741423328e-06, "loss": 0.4548, "step": 7836 }, { "epoch": 1.7481597144769128, "grad_norm": 0.188811257481575, "learning_rate": 7.5277382993858295e-06, "loss": 0.4444, "step": 7837 }, { "epoch": 1.7483827793888023, "grad_norm": 0.18189990520477295, "learning_rate": 7.525457994277737e-06, "loss": 0.4754, "step": 7838 }, { "epoch": 1.7486058443006915, "grad_norm": 0.179085835814476, "learning_rate": 7.5231778262253456e-06, "loss": 0.4511, "step": 7839 }, { "epoch": 1.7488289092125808, "grad_norm": 0.17552562057971954, "learning_rate": 7.520897795354947e-06, "loss": 0.446, "step": 7840 }, { "epoch": 1.74905197412447, "grad_norm": 0.1869988739490509, "learning_rate": 7.518617901792819e-06, "loss": 0.4591, "step": 7841 }, { "epoch": 1.7492750390363596, "grad_norm": 0.17976555228233337, "learning_rate": 7.516338145665245e-06, "loss": 0.4489, "step": 7842 }, { "epoch": 1.749498103948249, "grad_norm": 0.18475496768951416, "learning_rate": 7.514058527098484e-06, "loss": 0.4781, "step": 7843 }, { "epoch": 1.7497211688601384, "grad_norm": 0.17996063828468323, "learning_rate": 7.5117790462188016e-06, "loss": 0.435, "step": 7844 }, { "epoch": 1.7499442337720277, "grad_norm": 0.1795787513256073, "learning_rate": 7.509499703152445e-06, "loss": 0.4901, "step": 7845 }, { "epoch": 1.750167298683917, "grad_norm": 0.16901874542236328, "learning_rate": 7.507220498025662e-06, "loss": 0.4694, "step": 7846 }, { "epoch": 1.7503903635958062, "grad_norm": 0.16905230283737183, "learning_rate": 7.504941430964687e-06, "loss": 0.4333, "step": 7847 }, { "epoch": 1.7506134285076957, "grad_norm": 0.17756043374538422, "learning_rate": 7.502662502095752e-06, "loss": 0.4535, "step": 7848 }, { "epoch": 1.7508364934195852, "grad_norm": 0.17667444050312042, "learning_rate": 7.500383711545074e-06, "loss": 0.4636, "step": 7849 }, { "epoch": 1.7510595583314745, "grad_norm": 0.18121804296970367, "learning_rate": 7.4981050594388716e-06, "loss": 0.4796, "step": 7850 }, { "epoch": 1.7512826232433638, "grad_norm": 0.18036022782325745, "learning_rate": 7.495826545903347e-06, "loss": 0.4667, "step": 7851 }, { "epoch": 1.751505688155253, "grad_norm": 0.24715840816497803, "learning_rate": 7.4935481710647005e-06, "loss": 0.4713, "step": 7852 }, { "epoch": 1.7517287530671424, "grad_norm": 0.19006216526031494, "learning_rate": 7.491269935049122e-06, "loss": 0.4464, "step": 7853 }, { "epoch": 1.7519518179790319, "grad_norm": 0.18384769558906555, "learning_rate": 7.488991837982794e-06, "loss": 0.4707, "step": 7854 }, { "epoch": 1.7521748828909214, "grad_norm": 0.18586494028568268, "learning_rate": 7.486713879991896e-06, "loss": 0.4731, "step": 7855 }, { "epoch": 1.7523979478028107, "grad_norm": 0.19443517923355103, "learning_rate": 7.484436061202589e-06, "loss": 0.4883, "step": 7856 }, { "epoch": 1.7526210127147, "grad_norm": 0.17276805639266968, "learning_rate": 7.48215838174104e-06, "loss": 0.4449, "step": 7857 }, { "epoch": 1.7528440776265892, "grad_norm": 0.1835545003414154, "learning_rate": 7.4798808417333954e-06, "loss": 0.4748, "step": 7858 }, { "epoch": 1.7530671425384787, "grad_norm": 0.1789509505033493, "learning_rate": 7.477603441305803e-06, "loss": 0.4639, "step": 7859 }, { "epoch": 1.753290207450368, "grad_norm": 0.193350687623024, "learning_rate": 7.475326180584398e-06, "loss": 0.4897, "step": 7860 }, { "epoch": 1.7535132723622575, "grad_norm": 0.17808929085731506, "learning_rate": 7.473049059695311e-06, "loss": 0.4593, "step": 7861 }, { "epoch": 1.7537363372741468, "grad_norm": 0.18136462569236755, "learning_rate": 7.470772078764661e-06, "loss": 0.4693, "step": 7862 }, { "epoch": 1.753959402186036, "grad_norm": 0.1805723011493683, "learning_rate": 7.4684952379185625e-06, "loss": 0.4663, "step": 7863 }, { "epoch": 1.7541824670979254, "grad_norm": 0.1832210123538971, "learning_rate": 7.466218537283122e-06, "loss": 0.4541, "step": 7864 }, { "epoch": 1.7544055320098149, "grad_norm": 0.1848025768995285, "learning_rate": 7.463941976984439e-06, "loss": 0.4507, "step": 7865 }, { "epoch": 1.7546285969217044, "grad_norm": 0.1897166222333908, "learning_rate": 7.461665557148599e-06, "loss": 0.4422, "step": 7866 }, { "epoch": 1.7548516618335936, "grad_norm": 0.1763024479150772, "learning_rate": 7.459389277901689e-06, "loss": 0.4611, "step": 7867 }, { "epoch": 1.755074726745483, "grad_norm": 0.1759800910949707, "learning_rate": 7.4571131393697795e-06, "loss": 0.4406, "step": 7868 }, { "epoch": 1.7552977916573722, "grad_norm": 0.17623275518417358, "learning_rate": 7.454837141678942e-06, "loss": 0.4643, "step": 7869 }, { "epoch": 1.7555208565692615, "grad_norm": 0.1801615208387375, "learning_rate": 7.452561284955231e-06, "loss": 0.4649, "step": 7870 }, { "epoch": 1.755743921481151, "grad_norm": 0.1817091703414917, "learning_rate": 7.450285569324703e-06, "loss": 0.4531, "step": 7871 }, { "epoch": 1.7559669863930405, "grad_norm": 0.18977338075637817, "learning_rate": 7.448009994913396e-06, "loss": 0.4476, "step": 7872 }, { "epoch": 1.7561900513049298, "grad_norm": 0.17964302003383636, "learning_rate": 7.445734561847351e-06, "loss": 0.4365, "step": 7873 }, { "epoch": 1.756413116216819, "grad_norm": 0.17655393481254578, "learning_rate": 7.44345927025259e-06, "loss": 0.4412, "step": 7874 }, { "epoch": 1.7566361811287083, "grad_norm": 0.1832321286201477, "learning_rate": 7.441184120255141e-06, "loss": 0.4589, "step": 7875 }, { "epoch": 1.7568592460405978, "grad_norm": 0.18122322857379913, "learning_rate": 7.438909111981008e-06, "loss": 0.4363, "step": 7876 }, { "epoch": 1.7570823109524871, "grad_norm": 0.19032621383666992, "learning_rate": 7.436634245556195e-06, "loss": 0.454, "step": 7877 }, { "epoch": 1.7573053758643766, "grad_norm": 0.1851622313261032, "learning_rate": 7.4343595211067045e-06, "loss": 0.4839, "step": 7878 }, { "epoch": 1.757528440776266, "grad_norm": 0.2012101113796234, "learning_rate": 7.432084938758521e-06, "loss": 0.4918, "step": 7879 }, { "epoch": 1.7577515056881552, "grad_norm": 0.18474964797496796, "learning_rate": 7.429810498637627e-06, "loss": 0.4552, "step": 7880 }, { "epoch": 1.7579745706000445, "grad_norm": 0.17664113640785217, "learning_rate": 7.427536200869996e-06, "loss": 0.4456, "step": 7881 }, { "epoch": 1.758197635511934, "grad_norm": 0.18062026798725128, "learning_rate": 7.425262045581589e-06, "loss": 0.456, "step": 7882 }, { "epoch": 1.7584207004238235, "grad_norm": 0.18045106530189514, "learning_rate": 7.42298803289837e-06, "loss": 0.4886, "step": 7883 }, { "epoch": 1.7586437653357128, "grad_norm": 0.17931503057479858, "learning_rate": 7.420714162946278e-06, "loss": 0.4453, "step": 7884 }, { "epoch": 1.758866830247602, "grad_norm": 0.1748034656047821, "learning_rate": 7.418440435851265e-06, "loss": 0.4626, "step": 7885 }, { "epoch": 1.7590898951594913, "grad_norm": 0.17426064610481262, "learning_rate": 7.416166851739257e-06, "loss": 0.4369, "step": 7886 }, { "epoch": 1.7593129600713808, "grad_norm": 0.17878130078315735, "learning_rate": 7.4138934107361825e-06, "loss": 0.4624, "step": 7887 }, { "epoch": 1.75953602498327, "grad_norm": 0.1841915249824524, "learning_rate": 7.411620112967957e-06, "loss": 0.4625, "step": 7888 }, { "epoch": 1.7597590898951596, "grad_norm": 0.18047453463077545, "learning_rate": 7.4093469585604925e-06, "loss": 0.4383, "step": 7889 }, { "epoch": 1.759982154807049, "grad_norm": 0.1770285815000534, "learning_rate": 7.407073947639688e-06, "loss": 0.4267, "step": 7890 }, { "epoch": 1.7602052197189382, "grad_norm": 0.17349712550640106, "learning_rate": 7.404801080331441e-06, "loss": 0.4404, "step": 7891 }, { "epoch": 1.7604282846308275, "grad_norm": 0.17680755257606506, "learning_rate": 7.4025283567616315e-06, "loss": 0.4272, "step": 7892 }, { "epoch": 1.760651349542717, "grad_norm": 0.18388980627059937, "learning_rate": 7.400255777056142e-06, "loss": 0.4677, "step": 7893 }, { "epoch": 1.7608744144546062, "grad_norm": 0.18694987893104553, "learning_rate": 7.39798334134084e-06, "loss": 0.4669, "step": 7894 }, { "epoch": 1.7610974793664957, "grad_norm": 0.16978652775287628, "learning_rate": 7.395711049741588e-06, "loss": 0.4435, "step": 7895 }, { "epoch": 1.761320544278385, "grad_norm": 0.17169198393821716, "learning_rate": 7.3934389023842405e-06, "loss": 0.4306, "step": 7896 }, { "epoch": 1.7615436091902743, "grad_norm": 0.18931961059570312, "learning_rate": 7.391166899394642e-06, "loss": 0.4597, "step": 7897 }, { "epoch": 1.7617666741021636, "grad_norm": 0.17674033343791962, "learning_rate": 7.3888950408986294e-06, "loss": 0.4626, "step": 7898 }, { "epoch": 1.761989739014053, "grad_norm": 0.18383964896202087, "learning_rate": 7.386623327022034e-06, "loss": 0.4635, "step": 7899 }, { "epoch": 1.7622128039259426, "grad_norm": 0.1803615838289261, "learning_rate": 7.38435175789068e-06, "loss": 0.4811, "step": 7900 }, { "epoch": 1.7624358688378319, "grad_norm": 0.19607679545879364, "learning_rate": 7.382080333630377e-06, "loss": 0.4715, "step": 7901 }, { "epoch": 1.7626589337497212, "grad_norm": 0.17395561933517456, "learning_rate": 7.379809054366933e-06, "loss": 0.4515, "step": 7902 }, { "epoch": 1.7628819986616104, "grad_norm": 0.1800202876329422, "learning_rate": 7.3775379202261455e-06, "loss": 0.4933, "step": 7903 }, { "epoch": 1.7631050635735, "grad_norm": 0.1730235368013382, "learning_rate": 7.375266931333805e-06, "loss": 0.4634, "step": 7904 }, { "epoch": 1.7633281284853892, "grad_norm": 0.1844315528869629, "learning_rate": 7.37299608781569e-06, "loss": 0.456, "step": 7905 }, { "epoch": 1.7635511933972787, "grad_norm": 0.17907127737998962, "learning_rate": 7.370725389797577e-06, "loss": 0.451, "step": 7906 }, { "epoch": 1.763774258309168, "grad_norm": 0.18444164097309113, "learning_rate": 7.368454837405229e-06, "loss": 0.4647, "step": 7907 }, { "epoch": 1.7639973232210573, "grad_norm": 0.1789025217294693, "learning_rate": 7.366184430764407e-06, "loss": 0.4542, "step": 7908 }, { "epoch": 1.7642203881329466, "grad_norm": 0.17742502689361572, "learning_rate": 7.363914170000858e-06, "loss": 0.4419, "step": 7909 }, { "epoch": 1.764443453044836, "grad_norm": 0.18832780420780182, "learning_rate": 7.361644055240325e-06, "loss": 0.4763, "step": 7910 }, { "epoch": 1.7646665179567254, "grad_norm": 0.2054174244403839, "learning_rate": 7.359374086608539e-06, "loss": 0.4543, "step": 7911 }, { "epoch": 1.7648895828686149, "grad_norm": 0.17511680722236633, "learning_rate": 7.357104264231228e-06, "loss": 0.4377, "step": 7912 }, { "epoch": 1.7651126477805041, "grad_norm": 0.1764538437128067, "learning_rate": 7.354834588234105e-06, "loss": 0.4675, "step": 7913 }, { "epoch": 1.7653357126923934, "grad_norm": 0.17360688745975494, "learning_rate": 7.352565058742882e-06, "loss": 0.4409, "step": 7914 }, { "epoch": 1.7655587776042827, "grad_norm": 0.1800130158662796, "learning_rate": 7.350295675883259e-06, "loss": 0.466, "step": 7915 }, { "epoch": 1.7657818425161722, "grad_norm": 0.1828777939081192, "learning_rate": 7.34802643978093e-06, "loss": 0.4603, "step": 7916 }, { "epoch": 1.7660049074280617, "grad_norm": 0.180416077375412, "learning_rate": 7.345757350561577e-06, "loss": 0.4442, "step": 7917 }, { "epoch": 1.766227972339951, "grad_norm": 0.18395313620567322, "learning_rate": 7.343488408350879e-06, "loss": 0.4801, "step": 7918 }, { "epoch": 1.7664510372518403, "grad_norm": 0.1887856274843216, "learning_rate": 7.3412196132745005e-06, "loss": 0.4421, "step": 7919 }, { "epoch": 1.7666741021637296, "grad_norm": 0.17162606120109558, "learning_rate": 7.3389509654581045e-06, "loss": 0.4293, "step": 7920 }, { "epoch": 1.766897167075619, "grad_norm": 0.18713156878948212, "learning_rate": 7.336682465027346e-06, "loss": 0.4521, "step": 7921 }, { "epoch": 1.7671202319875083, "grad_norm": 0.1808861941099167, "learning_rate": 7.334414112107863e-06, "loss": 0.4461, "step": 7922 }, { "epoch": 1.7673432968993978, "grad_norm": 0.1840147227048874, "learning_rate": 7.3321459068252955e-06, "loss": 0.4606, "step": 7923 }, { "epoch": 1.7675663618112871, "grad_norm": 0.22466281056404114, "learning_rate": 7.329877849305267e-06, "loss": 0.4342, "step": 7924 }, { "epoch": 1.7677894267231764, "grad_norm": 0.18192514777183533, "learning_rate": 7.327609939673401e-06, "loss": 0.4638, "step": 7925 }, { "epoch": 1.7680124916350657, "grad_norm": 0.1804400235414505, "learning_rate": 7.325342178055304e-06, "loss": 0.4558, "step": 7926 }, { "epoch": 1.7682355565469552, "grad_norm": 0.1800002157688141, "learning_rate": 7.3230745645765845e-06, "loss": 0.4303, "step": 7927 }, { "epoch": 1.7684586214588445, "grad_norm": 0.17886100709438324, "learning_rate": 7.320807099362831e-06, "loss": 0.4507, "step": 7928 }, { "epoch": 1.768681686370734, "grad_norm": 0.1851029098033905, "learning_rate": 7.318539782539635e-06, "loss": 0.4693, "step": 7929 }, { "epoch": 1.7689047512826233, "grad_norm": 0.19121414422988892, "learning_rate": 7.316272614232572e-06, "loss": 0.4465, "step": 7930 }, { "epoch": 1.7691278161945125, "grad_norm": 0.18079820275306702, "learning_rate": 7.314005594567215e-06, "loss": 0.4475, "step": 7931 }, { "epoch": 1.7693508811064018, "grad_norm": 0.1832534670829773, "learning_rate": 7.311738723669122e-06, "loss": 0.4585, "step": 7932 }, { "epoch": 1.7695739460182913, "grad_norm": 0.18712462484836578, "learning_rate": 7.309472001663849e-06, "loss": 0.4578, "step": 7933 }, { "epoch": 1.7697970109301808, "grad_norm": 0.17979365587234497, "learning_rate": 7.307205428676939e-06, "loss": 0.4711, "step": 7934 }, { "epoch": 1.77002007584207, "grad_norm": 0.19163386523723602, "learning_rate": 7.304939004833933e-06, "loss": 0.432, "step": 7935 }, { "epoch": 1.7702431407539594, "grad_norm": 0.20467031002044678, "learning_rate": 7.302672730260356e-06, "loss": 0.4606, "step": 7936 }, { "epoch": 1.7704662056658487, "grad_norm": 0.1881827414035797, "learning_rate": 7.300406605081732e-06, "loss": 0.459, "step": 7937 }, { "epoch": 1.7706892705777382, "grad_norm": 0.18381071090698242, "learning_rate": 7.298140629423568e-06, "loss": 0.4804, "step": 7938 }, { "epoch": 1.7709123354896275, "grad_norm": 0.18901976943016052, "learning_rate": 7.295874803411373e-06, "loss": 0.4568, "step": 7939 }, { "epoch": 1.771135400401517, "grad_norm": 0.17536242306232452, "learning_rate": 7.293609127170642e-06, "loss": 0.4217, "step": 7940 }, { "epoch": 1.7713584653134062, "grad_norm": 0.17019738256931305, "learning_rate": 7.291343600826859e-06, "loss": 0.4405, "step": 7941 }, { "epoch": 1.7715815302252955, "grad_norm": 0.1769004762172699, "learning_rate": 7.2890782245055084e-06, "loss": 0.461, "step": 7942 }, { "epoch": 1.7718045951371848, "grad_norm": 0.18277987837791443, "learning_rate": 7.286812998332056e-06, "loss": 0.4691, "step": 7943 }, { "epoch": 1.7720276600490743, "grad_norm": 0.18873664736747742, "learning_rate": 7.2845479224319685e-06, "loss": 0.4606, "step": 7944 }, { "epoch": 1.7722507249609636, "grad_norm": 0.1791076958179474, "learning_rate": 7.2822829969306955e-06, "loss": 0.4369, "step": 7945 }, { "epoch": 1.772473789872853, "grad_norm": 0.17617996037006378, "learning_rate": 7.280018221953686e-06, "loss": 0.4538, "step": 7946 }, { "epoch": 1.7726968547847424, "grad_norm": 0.1820133924484253, "learning_rate": 7.277753597626378e-06, "loss": 0.4444, "step": 7947 }, { "epoch": 1.7729199196966317, "grad_norm": 0.16976666450500488, "learning_rate": 7.275489124074198e-06, "loss": 0.4415, "step": 7948 }, { "epoch": 1.773142984608521, "grad_norm": 0.19467420876026154, "learning_rate": 7.273224801422568e-06, "loss": 0.4527, "step": 7949 }, { "epoch": 1.7733660495204104, "grad_norm": 0.18623554706573486, "learning_rate": 7.270960629796903e-06, "loss": 0.4552, "step": 7950 }, { "epoch": 1.7735891144323, "grad_norm": 0.1663247048854828, "learning_rate": 7.2686966093226e-06, "loss": 0.4154, "step": 7951 }, { "epoch": 1.7738121793441892, "grad_norm": 0.18195995688438416, "learning_rate": 7.266432740125065e-06, "loss": 0.4501, "step": 7952 }, { "epoch": 1.7740352442560785, "grad_norm": 0.18873853981494904, "learning_rate": 7.264169022329676e-06, "loss": 0.4529, "step": 7953 }, { "epoch": 1.7742583091679678, "grad_norm": 0.1879783272743225, "learning_rate": 7.2619054560618175e-06, "loss": 0.4949, "step": 7954 }, { "epoch": 1.7744813740798573, "grad_norm": 0.17232050001621246, "learning_rate": 7.259642041446856e-06, "loss": 0.4307, "step": 7955 }, { "epoch": 1.7747044389917466, "grad_norm": 0.18015244603157043, "learning_rate": 7.25737877861016e-06, "loss": 0.4438, "step": 7956 }, { "epoch": 1.774927503903636, "grad_norm": 0.17779506742954254, "learning_rate": 7.255115667677077e-06, "loss": 0.4488, "step": 7957 }, { "epoch": 1.7751505688155254, "grad_norm": 0.17906124889850616, "learning_rate": 7.252852708772953e-06, "loss": 0.4606, "step": 7958 }, { "epoch": 1.7753736337274146, "grad_norm": 0.18236404657363892, "learning_rate": 7.250589902023127e-06, "loss": 0.452, "step": 7959 }, { "epoch": 1.775596698639304, "grad_norm": 0.17182929813861847, "learning_rate": 7.248327247552925e-06, "loss": 0.4517, "step": 7960 }, { "epoch": 1.7758197635511934, "grad_norm": 0.18052874505519867, "learning_rate": 7.246064745487671e-06, "loss": 0.4603, "step": 7961 }, { "epoch": 1.776042828463083, "grad_norm": 0.18056614696979523, "learning_rate": 7.243802395952673e-06, "loss": 0.4641, "step": 7962 }, { "epoch": 1.7762658933749722, "grad_norm": 0.17786100506782532, "learning_rate": 7.241540199073235e-06, "loss": 0.4782, "step": 7963 }, { "epoch": 1.7764889582868615, "grad_norm": 0.17451272904872894, "learning_rate": 7.239278154974654e-06, "loss": 0.4126, "step": 7964 }, { "epoch": 1.7767120231987508, "grad_norm": 0.1730719357728958, "learning_rate": 7.237016263782213e-06, "loss": 0.4368, "step": 7965 }, { "epoch": 1.77693508811064, "grad_norm": 0.173664852976799, "learning_rate": 7.234754525621192e-06, "loss": 0.4113, "step": 7966 }, { "epoch": 1.7771581530225296, "grad_norm": 0.17867377400398254, "learning_rate": 7.232492940616858e-06, "loss": 0.4555, "step": 7967 }, { "epoch": 1.777381217934419, "grad_norm": 0.1810644119977951, "learning_rate": 7.2302315088944755e-06, "loss": 0.458, "step": 7968 }, { "epoch": 1.7776042828463083, "grad_norm": 0.18765312433242798, "learning_rate": 7.227970230579292e-06, "loss": 0.4574, "step": 7969 }, { "epoch": 1.7778273477581976, "grad_norm": 0.1742631047964096, "learning_rate": 7.225709105796557e-06, "loss": 0.4651, "step": 7970 }, { "epoch": 1.778050412670087, "grad_norm": 0.22400738298892975, "learning_rate": 7.223448134671499e-06, "loss": 0.4561, "step": 7971 }, { "epoch": 1.7782734775819764, "grad_norm": 0.18479710817337036, "learning_rate": 7.221187317329353e-06, "loss": 0.4447, "step": 7972 }, { "epoch": 1.7784965424938657, "grad_norm": 0.17623472213745117, "learning_rate": 7.218926653895329e-06, "loss": 0.44, "step": 7973 }, { "epoch": 1.7787196074057552, "grad_norm": 0.18266162276268005, "learning_rate": 7.216666144494642e-06, "loss": 0.4708, "step": 7974 }, { "epoch": 1.7789426723176445, "grad_norm": 0.1808854639530182, "learning_rate": 7.21440578925249e-06, "loss": 0.4403, "step": 7975 }, { "epoch": 1.7791657372295338, "grad_norm": 0.1751621812582016, "learning_rate": 7.212145588294071e-06, "loss": 0.4669, "step": 7976 }, { "epoch": 1.779388802141423, "grad_norm": 0.18863114714622498, "learning_rate": 7.2098855417445625e-06, "loss": 0.4674, "step": 7977 }, { "epoch": 1.7796118670533125, "grad_norm": 0.17852959036827087, "learning_rate": 7.207625649729145e-06, "loss": 0.4389, "step": 7978 }, { "epoch": 1.779834931965202, "grad_norm": 0.18438269197940826, "learning_rate": 7.205365912372982e-06, "loss": 0.4467, "step": 7979 }, { "epoch": 1.7800579968770913, "grad_norm": 0.179841086268425, "learning_rate": 7.203106329801236e-06, "loss": 0.4554, "step": 7980 }, { "epoch": 1.7802810617889806, "grad_norm": 0.1838013380765915, "learning_rate": 7.200846902139053e-06, "loss": 0.4643, "step": 7981 }, { "epoch": 1.7805041267008699, "grad_norm": 0.17783623933792114, "learning_rate": 7.198587629511577e-06, "loss": 0.4615, "step": 7982 }, { "epoch": 1.7807271916127592, "grad_norm": 0.18178191781044006, "learning_rate": 7.196328512043938e-06, "loss": 0.4285, "step": 7983 }, { "epoch": 1.7809502565246487, "grad_norm": 0.1845524162054062, "learning_rate": 7.194069549861262e-06, "loss": 0.4441, "step": 7984 }, { "epoch": 1.7811733214365382, "grad_norm": 0.17717888951301575, "learning_rate": 7.191810743088667e-06, "loss": 0.465, "step": 7985 }, { "epoch": 1.7813963863484275, "grad_norm": 0.1790652871131897, "learning_rate": 7.189552091851254e-06, "loss": 0.4652, "step": 7986 }, { "epoch": 1.7816194512603167, "grad_norm": 0.17189590632915497, "learning_rate": 7.187293596274128e-06, "loss": 0.4241, "step": 7987 }, { "epoch": 1.781842516172206, "grad_norm": 0.16961240768432617, "learning_rate": 7.1850352564823715e-06, "loss": 0.4425, "step": 7988 }, { "epoch": 1.7820655810840955, "grad_norm": 0.17109279334545135, "learning_rate": 7.182777072601072e-06, "loss": 0.4479, "step": 7989 }, { "epoch": 1.7822886459959848, "grad_norm": 0.17438296973705292, "learning_rate": 7.180519044755298e-06, "loss": 0.4439, "step": 7990 }, { "epoch": 1.7825117109078743, "grad_norm": 0.17713730037212372, "learning_rate": 7.178261173070116e-06, "loss": 0.4643, "step": 7991 }, { "epoch": 1.7827347758197636, "grad_norm": 0.18717728555202484, "learning_rate": 7.176003457670577e-06, "loss": 0.4673, "step": 7992 }, { "epoch": 1.7829578407316529, "grad_norm": 0.18204233050346375, "learning_rate": 7.173745898681732e-06, "loss": 0.4605, "step": 7993 }, { "epoch": 1.7831809056435421, "grad_norm": 0.18667717278003693, "learning_rate": 7.171488496228617e-06, "loss": 0.484, "step": 7994 }, { "epoch": 1.7834039705554317, "grad_norm": 0.18115851283073425, "learning_rate": 7.169231250436261e-06, "loss": 0.4749, "step": 7995 }, { "epoch": 1.7836270354673212, "grad_norm": 0.17957910895347595, "learning_rate": 7.166974161429684e-06, "loss": 0.4673, "step": 7996 }, { "epoch": 1.7838501003792104, "grad_norm": 0.17228113114833832, "learning_rate": 7.1647172293338995e-06, "loss": 0.4658, "step": 7997 }, { "epoch": 1.7840731652910997, "grad_norm": 0.17893186211585999, "learning_rate": 7.162460454273909e-06, "loss": 0.4639, "step": 7998 }, { "epoch": 1.784296230202989, "grad_norm": 0.17566993832588196, "learning_rate": 7.160203836374708e-06, "loss": 0.4567, "step": 7999 }, { "epoch": 1.7845192951148783, "grad_norm": 0.17004132270812988, "learning_rate": 7.1579473757612795e-06, "loss": 0.4419, "step": 8000 }, { "epoch": 1.7847423600267678, "grad_norm": 0.17692558467388153, "learning_rate": 7.155691072558604e-06, "loss": 0.4546, "step": 8001 }, { "epoch": 1.7849654249386573, "grad_norm": 0.1739339381456375, "learning_rate": 7.153434926891648e-06, "loss": 0.4636, "step": 8002 }, { "epoch": 1.7851884898505466, "grad_norm": 0.17398999631404877, "learning_rate": 7.151178938885373e-06, "loss": 0.4291, "step": 8003 }, { "epoch": 1.7854115547624358, "grad_norm": 0.1721528023481369, "learning_rate": 7.148923108664725e-06, "loss": 0.4325, "step": 8004 }, { "epoch": 1.7856346196743251, "grad_norm": 0.18906241655349731, "learning_rate": 7.146667436354651e-06, "loss": 0.4211, "step": 8005 }, { "epoch": 1.7858576845862146, "grad_norm": 0.18445979058742523, "learning_rate": 7.144411922080082e-06, "loss": 0.4536, "step": 8006 }, { "epoch": 1.786080749498104, "grad_norm": 0.18223892152309418, "learning_rate": 7.142156565965943e-06, "loss": 0.4554, "step": 8007 }, { "epoch": 1.7863038144099934, "grad_norm": 0.18098188936710358, "learning_rate": 7.139901368137152e-06, "loss": 0.4453, "step": 8008 }, { "epoch": 1.7865268793218827, "grad_norm": 0.17738774418830872, "learning_rate": 7.1376463287186105e-06, "loss": 0.4302, "step": 8009 }, { "epoch": 1.786749944233772, "grad_norm": 0.18761973083019257, "learning_rate": 7.135391447835224e-06, "loss": 0.4673, "step": 8010 }, { "epoch": 1.7869730091456613, "grad_norm": 0.17969244718551636, "learning_rate": 7.133136725611876e-06, "loss": 0.4278, "step": 8011 }, { "epoch": 1.7871960740575508, "grad_norm": 0.19217950105667114, "learning_rate": 7.13088216217345e-06, "loss": 0.4448, "step": 8012 }, { "epoch": 1.7874191389694403, "grad_norm": 0.1748211681842804, "learning_rate": 7.128627757644816e-06, "loss": 0.4345, "step": 8013 }, { "epoch": 1.7876422038813296, "grad_norm": 0.1840067058801651, "learning_rate": 7.126373512150842e-06, "loss": 0.4478, "step": 8014 }, { "epoch": 1.7878652687932188, "grad_norm": 0.1767268180847168, "learning_rate": 7.1241194258163735e-06, "loss": 0.4493, "step": 8015 }, { "epoch": 1.7880883337051081, "grad_norm": 0.17844226956367493, "learning_rate": 7.121865498766265e-06, "loss": 0.4131, "step": 8016 }, { "epoch": 1.7883113986169974, "grad_norm": 0.22129550576210022, "learning_rate": 7.119611731125349e-06, "loss": 0.4441, "step": 8017 }, { "epoch": 1.788534463528887, "grad_norm": 0.18443824350833893, "learning_rate": 7.117358123018454e-06, "loss": 0.4344, "step": 8018 }, { "epoch": 1.7887575284407764, "grad_norm": 0.17921538650989532, "learning_rate": 7.115104674570397e-06, "loss": 0.4646, "step": 8019 }, { "epoch": 1.7889805933526657, "grad_norm": 0.1854918748140335, "learning_rate": 7.11285138590599e-06, "loss": 0.4692, "step": 8020 }, { "epoch": 1.789203658264555, "grad_norm": 0.1828143298625946, "learning_rate": 7.110598257150034e-06, "loss": 0.444, "step": 8021 }, { "epoch": 1.7894267231764442, "grad_norm": 0.18005596101284027, "learning_rate": 7.108345288427324e-06, "loss": 0.4447, "step": 8022 }, { "epoch": 1.7896497880883337, "grad_norm": 0.18779170513153076, "learning_rate": 7.106092479862639e-06, "loss": 0.4516, "step": 8023 }, { "epoch": 1.789872853000223, "grad_norm": 0.17747437953948975, "learning_rate": 7.103839831580754e-06, "loss": 0.4436, "step": 8024 }, { "epoch": 1.7900959179121125, "grad_norm": 0.1890641450881958, "learning_rate": 7.101587343706441e-06, "loss": 0.4624, "step": 8025 }, { "epoch": 1.7903189828240018, "grad_norm": 0.18202100694179535, "learning_rate": 7.099335016364449e-06, "loss": 0.4821, "step": 8026 }, { "epoch": 1.790542047735891, "grad_norm": 0.17150594294071198, "learning_rate": 7.097082849679533e-06, "loss": 0.444, "step": 8027 }, { "epoch": 1.7907651126477804, "grad_norm": 0.18223629891872406, "learning_rate": 7.0948308437764255e-06, "loss": 0.4514, "step": 8028 }, { "epoch": 1.7909881775596699, "grad_norm": 0.1873582899570465, "learning_rate": 7.092578998779864e-06, "loss": 0.4614, "step": 8029 }, { "epoch": 1.7912112424715594, "grad_norm": 0.17026644945144653, "learning_rate": 7.090327314814562e-06, "loss": 0.4616, "step": 8030 }, { "epoch": 1.7914343073834487, "grad_norm": 0.1856604516506195, "learning_rate": 7.088075792005239e-06, "loss": 0.4666, "step": 8031 }, { "epoch": 1.791657372295338, "grad_norm": 0.18316306173801422, "learning_rate": 7.085824430476593e-06, "loss": 0.4768, "step": 8032 }, { "epoch": 1.7918804372072272, "grad_norm": 0.16916830837726593, "learning_rate": 7.083573230353324e-06, "loss": 0.4437, "step": 8033 }, { "epoch": 1.7921035021191165, "grad_norm": 0.17978118360042572, "learning_rate": 7.0813221917601104e-06, "loss": 0.4525, "step": 8034 }, { "epoch": 1.792326567031006, "grad_norm": 0.19063124060630798, "learning_rate": 7.079071314821637e-06, "loss": 0.4366, "step": 8035 }, { "epoch": 1.7925496319428955, "grad_norm": 0.1843474954366684, "learning_rate": 7.076820599662564e-06, "loss": 0.4635, "step": 8036 }, { "epoch": 1.7927726968547848, "grad_norm": 0.17682790756225586, "learning_rate": 7.074570046407556e-06, "loss": 0.4645, "step": 8037 }, { "epoch": 1.792995761766674, "grad_norm": 0.19309797883033752, "learning_rate": 7.072319655181263e-06, "loss": 0.4527, "step": 8038 }, { "epoch": 1.7932188266785634, "grad_norm": 0.17107698321342468, "learning_rate": 7.07006942610832e-06, "loss": 0.4349, "step": 8039 }, { "epoch": 1.7934418915904529, "grad_norm": 0.18476001918315887, "learning_rate": 7.067819359313364e-06, "loss": 0.4645, "step": 8040 }, { "epoch": 1.7936649565023421, "grad_norm": 0.17997145652770996, "learning_rate": 7.065569454921013e-06, "loss": 0.4634, "step": 8041 }, { "epoch": 1.7938880214142316, "grad_norm": 0.17692801356315613, "learning_rate": 7.063319713055887e-06, "loss": 0.4336, "step": 8042 }, { "epoch": 1.794111086326121, "grad_norm": 0.17240415513515472, "learning_rate": 7.061070133842584e-06, "loss": 0.4452, "step": 8043 }, { "epoch": 1.7943341512380102, "grad_norm": 0.16578581929206848, "learning_rate": 7.058820717405707e-06, "loss": 0.4297, "step": 8044 }, { "epoch": 1.7945572161498995, "grad_norm": 0.1810625195503235, "learning_rate": 7.056571463869838e-06, "loss": 0.4628, "step": 8045 }, { "epoch": 1.794780281061789, "grad_norm": 0.1759740114212036, "learning_rate": 7.054322373359557e-06, "loss": 0.4383, "step": 8046 }, { "epoch": 1.7950033459736785, "grad_norm": 0.18540038168430328, "learning_rate": 7.052073445999431e-06, "loss": 0.4655, "step": 8047 }, { "epoch": 1.7952264108855678, "grad_norm": 0.17017890512943268, "learning_rate": 7.04982468191402e-06, "loss": 0.4216, "step": 8048 }, { "epoch": 1.795449475797457, "grad_norm": 0.17637605965137482, "learning_rate": 7.047576081227878e-06, "loss": 0.463, "step": 8049 }, { "epoch": 1.7956725407093463, "grad_norm": 0.1885131597518921, "learning_rate": 7.045327644065542e-06, "loss": 0.4563, "step": 8050 }, { "epoch": 1.7958956056212356, "grad_norm": 0.18475908041000366, "learning_rate": 7.043079370551549e-06, "loss": 0.4511, "step": 8051 }, { "epoch": 1.7961186705331251, "grad_norm": 0.19659362733364105, "learning_rate": 7.0408312608104164e-06, "loss": 0.4794, "step": 8052 }, { "epoch": 1.7963417354450146, "grad_norm": 0.1826995611190796, "learning_rate": 7.038583314966666e-06, "loss": 0.4628, "step": 8053 }, { "epoch": 1.796564800356904, "grad_norm": 0.1809930056333542, "learning_rate": 7.036335533144798e-06, "loss": 0.4272, "step": 8054 }, { "epoch": 1.7967878652687932, "grad_norm": 0.177769273519516, "learning_rate": 7.034087915469312e-06, "loss": 0.4509, "step": 8055 }, { "epoch": 1.7970109301806825, "grad_norm": 0.17363698780536652, "learning_rate": 7.031840462064691e-06, "loss": 0.4564, "step": 8056 }, { "epoch": 1.797233995092572, "grad_norm": 0.17677956819534302, "learning_rate": 7.029593173055418e-06, "loss": 0.4327, "step": 8057 }, { "epoch": 1.7974570600044613, "grad_norm": 0.1727108657360077, "learning_rate": 7.0273460485659576e-06, "loss": 0.4229, "step": 8058 }, { "epoch": 1.7976801249163508, "grad_norm": 0.1822299361228943, "learning_rate": 7.025099088720773e-06, "loss": 0.4397, "step": 8059 }, { "epoch": 1.79790318982824, "grad_norm": 0.18957071006298065, "learning_rate": 7.022852293644313e-06, "loss": 0.4679, "step": 8060 }, { "epoch": 1.7981262547401293, "grad_norm": 0.17512580752372742, "learning_rate": 7.020605663461022e-06, "loss": 0.4267, "step": 8061 }, { "epoch": 1.7983493196520186, "grad_norm": 0.1905566304922104, "learning_rate": 7.018359198295328e-06, "loss": 0.4542, "step": 8062 }, { "epoch": 1.798572384563908, "grad_norm": 0.1788644641637802, "learning_rate": 7.016112898271658e-06, "loss": 0.4609, "step": 8063 }, { "epoch": 1.7987954494757976, "grad_norm": 0.1741524040699005, "learning_rate": 7.013866763514426e-06, "loss": 0.439, "step": 8064 }, { "epoch": 1.799018514387687, "grad_norm": 0.169100821018219, "learning_rate": 7.011620794148035e-06, "loss": 0.4511, "step": 8065 }, { "epoch": 1.7992415792995762, "grad_norm": 0.1780511438846588, "learning_rate": 7.009374990296882e-06, "loss": 0.4391, "step": 8066 }, { "epoch": 1.7994646442114655, "grad_norm": 0.17674995958805084, "learning_rate": 7.0071293520853555e-06, "loss": 0.4662, "step": 8067 }, { "epoch": 1.7996877091233547, "grad_norm": 0.17258092761039734, "learning_rate": 7.004883879637829e-06, "loss": 0.4235, "step": 8068 }, { "epoch": 1.7999107740352442, "grad_norm": 0.17394836246967316, "learning_rate": 7.002638573078673e-06, "loss": 0.4728, "step": 8069 }, { "epoch": 1.8001338389471337, "grad_norm": 0.17654062807559967, "learning_rate": 7.000393432532249e-06, "loss": 0.4386, "step": 8070 }, { "epoch": 1.800356903859023, "grad_norm": 0.17876291275024414, "learning_rate": 6.998148458122903e-06, "loss": 0.4621, "step": 8071 }, { "epoch": 1.8005799687709123, "grad_norm": 0.18045787513256073, "learning_rate": 6.99590364997498e-06, "loss": 0.4521, "step": 8072 }, { "epoch": 1.8008030336828016, "grad_norm": 0.1908344030380249, "learning_rate": 6.993659008212808e-06, "loss": 0.4671, "step": 8073 }, { "epoch": 1.801026098594691, "grad_norm": 0.18054337799549103, "learning_rate": 6.991414532960712e-06, "loss": 0.4476, "step": 8074 }, { "epoch": 1.8012491635065804, "grad_norm": 0.18316704034805298, "learning_rate": 6.989170224343003e-06, "loss": 0.4653, "step": 8075 }, { "epoch": 1.8014722284184699, "grad_norm": 0.17818012833595276, "learning_rate": 6.986926082483986e-06, "loss": 0.4487, "step": 8076 }, { "epoch": 1.8016952933303592, "grad_norm": 0.17305941879749298, "learning_rate": 6.9846821075079544e-06, "loss": 0.4331, "step": 8077 }, { "epoch": 1.8019183582422484, "grad_norm": 0.17837919294834137, "learning_rate": 6.982438299539197e-06, "loss": 0.433, "step": 8078 }, { "epoch": 1.8021414231541377, "grad_norm": 0.198192298412323, "learning_rate": 6.980194658701985e-06, "loss": 0.4623, "step": 8079 }, { "epoch": 1.8023644880660272, "grad_norm": 0.18458157777786255, "learning_rate": 6.977951185120591e-06, "loss": 0.4642, "step": 8080 }, { "epoch": 1.8025875529779167, "grad_norm": 0.17324891686439514, "learning_rate": 6.975707878919268e-06, "loss": 0.4549, "step": 8081 }, { "epoch": 1.802810617889806, "grad_norm": 0.1909075230360031, "learning_rate": 6.973464740222268e-06, "loss": 0.4647, "step": 8082 }, { "epoch": 1.8030336828016953, "grad_norm": 0.17792390286922455, "learning_rate": 6.971221769153826e-06, "loss": 0.4563, "step": 8083 }, { "epoch": 1.8032567477135846, "grad_norm": 0.17481090128421783, "learning_rate": 6.968978965838176e-06, "loss": 0.4537, "step": 8084 }, { "epoch": 1.8034798126254739, "grad_norm": 0.19158580899238586, "learning_rate": 6.966736330399535e-06, "loss": 0.4493, "step": 8085 }, { "epoch": 1.8037028775373634, "grad_norm": 0.18006612360477448, "learning_rate": 6.964493862962118e-06, "loss": 0.4838, "step": 8086 }, { "epoch": 1.8039259424492529, "grad_norm": 0.18528711795806885, "learning_rate": 6.962251563650124e-06, "loss": 0.4397, "step": 8087 }, { "epoch": 1.8041490073611421, "grad_norm": 0.17403841018676758, "learning_rate": 6.960009432587747e-06, "loss": 0.4219, "step": 8088 }, { "epoch": 1.8043720722730314, "grad_norm": 0.17622390389442444, "learning_rate": 6.957767469899168e-06, "loss": 0.4645, "step": 8089 }, { "epoch": 1.8045951371849207, "grad_norm": 0.17650552093982697, "learning_rate": 6.955525675708564e-06, "loss": 0.4427, "step": 8090 }, { "epoch": 1.8048182020968102, "grad_norm": 0.1795479953289032, "learning_rate": 6.953284050140101e-06, "loss": 0.4268, "step": 8091 }, { "epoch": 1.8050412670086995, "grad_norm": 0.18363459408283234, "learning_rate": 6.951042593317929e-06, "loss": 0.4403, "step": 8092 }, { "epoch": 1.805264331920589, "grad_norm": 0.18579071760177612, "learning_rate": 6.948801305366199e-06, "loss": 0.4556, "step": 8093 }, { "epoch": 1.8054873968324783, "grad_norm": 0.1805351972579956, "learning_rate": 6.946560186409042e-06, "loss": 0.4356, "step": 8094 }, { "epoch": 1.8057104617443676, "grad_norm": 0.18544495105743408, "learning_rate": 6.944319236570593e-06, "loss": 0.46, "step": 8095 }, { "epoch": 1.8059335266562568, "grad_norm": 0.1721157431602478, "learning_rate": 6.942078455974963e-06, "loss": 0.4562, "step": 8096 }, { "epoch": 1.8061565915681463, "grad_norm": 0.1732361763715744, "learning_rate": 6.9398378447462645e-06, "loss": 0.4537, "step": 8097 }, { "epoch": 1.8063796564800358, "grad_norm": 0.1871383637189865, "learning_rate": 6.937597403008594e-06, "loss": 0.4808, "step": 8098 }, { "epoch": 1.8066027213919251, "grad_norm": 0.17318753898143768, "learning_rate": 6.9353571308860446e-06, "loss": 0.4451, "step": 8099 }, { "epoch": 1.8068257863038144, "grad_norm": 0.17707256972789764, "learning_rate": 6.933117028502693e-06, "loss": 0.4374, "step": 8100 }, { "epoch": 1.8070488512157037, "grad_norm": 0.18298032879829407, "learning_rate": 6.930877095982613e-06, "loss": 0.4588, "step": 8101 }, { "epoch": 1.807271916127593, "grad_norm": 0.18046192824840546, "learning_rate": 6.928637333449863e-06, "loss": 0.4731, "step": 8102 }, { "epoch": 1.8074949810394825, "grad_norm": 0.16831883788108826, "learning_rate": 6.9263977410285014e-06, "loss": 0.4308, "step": 8103 }, { "epoch": 1.807718045951372, "grad_norm": 0.17589864134788513, "learning_rate": 6.924158318842562e-06, "loss": 0.4372, "step": 8104 }, { "epoch": 1.8079411108632613, "grad_norm": 0.1834905594587326, "learning_rate": 6.9219190670160856e-06, "loss": 0.477, "step": 8105 }, { "epoch": 1.8081641757751505, "grad_norm": 0.18303707242012024, "learning_rate": 6.919679985673091e-06, "loss": 0.4603, "step": 8106 }, { "epoch": 1.8083872406870398, "grad_norm": 0.1804521679878235, "learning_rate": 6.917441074937597e-06, "loss": 0.4504, "step": 8107 }, { "epoch": 1.8086103055989293, "grad_norm": 0.19304148852825165, "learning_rate": 6.915202334933604e-06, "loss": 0.4768, "step": 8108 }, { "epoch": 1.8088333705108186, "grad_norm": 0.1749470978975296, "learning_rate": 6.912963765785111e-06, "loss": 0.449, "step": 8109 }, { "epoch": 1.809056435422708, "grad_norm": 0.18221113085746765, "learning_rate": 6.910725367616102e-06, "loss": 0.454, "step": 8110 }, { "epoch": 1.8092795003345974, "grad_norm": 0.1730881929397583, "learning_rate": 6.908487140550555e-06, "loss": 0.4506, "step": 8111 }, { "epoch": 1.8095025652464867, "grad_norm": 0.17381657660007477, "learning_rate": 6.9062490847124375e-06, "loss": 0.4499, "step": 8112 }, { "epoch": 1.809725630158376, "grad_norm": 0.17647477984428406, "learning_rate": 6.904011200225704e-06, "loss": 0.4394, "step": 8113 }, { "epoch": 1.8099486950702655, "grad_norm": 0.17345494031906128, "learning_rate": 6.901773487214308e-06, "loss": 0.463, "step": 8114 }, { "epoch": 1.810171759982155, "grad_norm": 0.178610160946846, "learning_rate": 6.899535945802182e-06, "loss": 0.4519, "step": 8115 }, { "epoch": 1.8103948248940442, "grad_norm": 0.17938841879367828, "learning_rate": 6.89729857611326e-06, "loss": 0.4326, "step": 8116 }, { "epoch": 1.8106178898059335, "grad_norm": 0.19250169396400452, "learning_rate": 6.895061378271457e-06, "loss": 0.4666, "step": 8117 }, { "epoch": 1.8108409547178228, "grad_norm": 0.17775999009609222, "learning_rate": 6.892824352400689e-06, "loss": 0.4465, "step": 8118 }, { "epoch": 1.811064019629712, "grad_norm": 0.1746588945388794, "learning_rate": 6.890587498624854e-06, "loss": 0.4471, "step": 8119 }, { "epoch": 1.8112870845416016, "grad_norm": 0.2063416689634323, "learning_rate": 6.88835081706784e-06, "loss": 0.4341, "step": 8120 }, { "epoch": 1.811510149453491, "grad_norm": 0.1776486337184906, "learning_rate": 6.8861143078535334e-06, "loss": 0.4288, "step": 8121 }, { "epoch": 1.8117332143653804, "grad_norm": 0.1845918595790863, "learning_rate": 6.883877971105799e-06, "loss": 0.4622, "step": 8122 }, { "epoch": 1.8119562792772697, "grad_norm": 0.174449160695076, "learning_rate": 6.881641806948507e-06, "loss": 0.4518, "step": 8123 }, { "epoch": 1.812179344189159, "grad_norm": 0.16784648597240448, "learning_rate": 6.879405815505504e-06, "loss": 0.4349, "step": 8124 }, { "epoch": 1.8124024091010484, "grad_norm": 0.19099272787570953, "learning_rate": 6.877169996900639e-06, "loss": 0.4694, "step": 8125 }, { "epoch": 1.8126254740129377, "grad_norm": 0.17356911301612854, "learning_rate": 6.874934351257742e-06, "loss": 0.4486, "step": 8126 }, { "epoch": 1.8128485389248272, "grad_norm": 0.1857564002275467, "learning_rate": 6.872698878700639e-06, "loss": 0.4687, "step": 8127 }, { "epoch": 1.8130716038367165, "grad_norm": 0.18476644158363342, "learning_rate": 6.870463579353141e-06, "loss": 0.4673, "step": 8128 }, { "epoch": 1.8132946687486058, "grad_norm": 0.17950499057769775, "learning_rate": 6.868228453339058e-06, "loss": 0.439, "step": 8129 }, { "epoch": 1.813517733660495, "grad_norm": 0.17879453301429749, "learning_rate": 6.865993500782182e-06, "loss": 0.461, "step": 8130 }, { "epoch": 1.8137407985723846, "grad_norm": 0.1743541806936264, "learning_rate": 6.8637587218063e-06, "loss": 0.4458, "step": 8131 }, { "epoch": 1.813963863484274, "grad_norm": 0.18763816356658936, "learning_rate": 6.861524116535187e-06, "loss": 0.4482, "step": 8132 }, { "epoch": 1.8141869283961634, "grad_norm": 0.17765846848487854, "learning_rate": 6.85928968509261e-06, "loss": 0.4387, "step": 8133 }, { "epoch": 1.8144099933080526, "grad_norm": 0.1837596446275711, "learning_rate": 6.857055427602327e-06, "loss": 0.4789, "step": 8134 }, { "epoch": 1.814633058219942, "grad_norm": 0.18249496817588806, "learning_rate": 6.854821344188083e-06, "loss": 0.4511, "step": 8135 }, { "epoch": 1.8148561231318312, "grad_norm": 0.17417484521865845, "learning_rate": 6.852587434973619e-06, "loss": 0.4265, "step": 8136 }, { "epoch": 1.8150791880437207, "grad_norm": 0.17669354379177094, "learning_rate": 6.850353700082659e-06, "loss": 0.4448, "step": 8137 }, { "epoch": 1.8153022529556102, "grad_norm": 0.17180798947811127, "learning_rate": 6.8481201396389254e-06, "loss": 0.4516, "step": 8138 }, { "epoch": 1.8155253178674995, "grad_norm": 0.17706137895584106, "learning_rate": 6.845886753766122e-06, "loss": 0.4411, "step": 8139 }, { "epoch": 1.8157483827793888, "grad_norm": 0.18320493400096893, "learning_rate": 6.843653542587952e-06, "loss": 0.4633, "step": 8140 }, { "epoch": 1.815971447691278, "grad_norm": 0.1703965961933136, "learning_rate": 6.8414205062281025e-06, "loss": 0.435, "step": 8141 }, { "epoch": 1.8161945126031676, "grad_norm": 0.2011895328760147, "learning_rate": 6.839187644810255e-06, "loss": 0.4653, "step": 8142 }, { "epoch": 1.8164175775150568, "grad_norm": 0.17818677425384521, "learning_rate": 6.836954958458075e-06, "loss": 0.4564, "step": 8143 }, { "epoch": 1.8166406424269463, "grad_norm": 0.17748653888702393, "learning_rate": 6.834722447295228e-06, "loss": 0.4357, "step": 8144 }, { "epoch": 1.8168637073388356, "grad_norm": 0.1691962629556656, "learning_rate": 6.832490111445361e-06, "loss": 0.4411, "step": 8145 }, { "epoch": 1.817086772250725, "grad_norm": 0.1772109568119049, "learning_rate": 6.830257951032118e-06, "loss": 0.4365, "step": 8146 }, { "epoch": 1.8173098371626142, "grad_norm": 0.17656628787517548, "learning_rate": 6.828025966179126e-06, "loss": 0.4387, "step": 8147 }, { "epoch": 1.8175329020745037, "grad_norm": 0.19161885976791382, "learning_rate": 6.825794157010011e-06, "loss": 0.4518, "step": 8148 }, { "epoch": 1.8177559669863932, "grad_norm": 0.18089298903942108, "learning_rate": 6.823562523648379e-06, "loss": 0.4414, "step": 8149 }, { "epoch": 1.8179790318982825, "grad_norm": 0.1826842874288559, "learning_rate": 6.8213310662178375e-06, "loss": 0.4651, "step": 8150 }, { "epoch": 1.8182020968101718, "grad_norm": 0.1827784776687622, "learning_rate": 6.819099784841974e-06, "loss": 0.4793, "step": 8151 }, { "epoch": 1.818425161722061, "grad_norm": 0.193526491522789, "learning_rate": 6.816868679644375e-06, "loss": 0.4758, "step": 8152 }, { "epoch": 1.8186482266339503, "grad_norm": 0.18154491484165192, "learning_rate": 6.814637750748609e-06, "loss": 0.4674, "step": 8153 }, { "epoch": 1.8188712915458398, "grad_norm": 0.18000057339668274, "learning_rate": 6.812406998278242e-06, "loss": 0.4513, "step": 8154 }, { "epoch": 1.8190943564577293, "grad_norm": 0.17618589103221893, "learning_rate": 6.8101764223568286e-06, "loss": 0.4684, "step": 8155 }, { "epoch": 1.8193174213696186, "grad_norm": 0.1722210943698883, "learning_rate": 6.807946023107906e-06, "loss": 0.4327, "step": 8156 }, { "epoch": 1.8195404862815079, "grad_norm": 0.18736113607883453, "learning_rate": 6.805715800655015e-06, "loss": 0.4647, "step": 8157 }, { "epoch": 1.8197635511933972, "grad_norm": 0.17818038165569305, "learning_rate": 6.803485755121672e-06, "loss": 0.4543, "step": 8158 }, { "epoch": 1.8199866161052867, "grad_norm": 0.18235214054584503, "learning_rate": 6.801255886631399e-06, "loss": 0.4679, "step": 8159 }, { "epoch": 1.820209681017176, "grad_norm": 0.1716981828212738, "learning_rate": 6.799026195307693e-06, "loss": 0.4515, "step": 8160 }, { "epoch": 1.8204327459290655, "grad_norm": 0.17725342512130737, "learning_rate": 6.796796681274054e-06, "loss": 0.4501, "step": 8161 }, { "epoch": 1.8206558108409547, "grad_norm": 0.18533247709274292, "learning_rate": 6.79456734465396e-06, "loss": 0.4674, "step": 8162 }, { "epoch": 1.820878875752844, "grad_norm": 0.17556942999362946, "learning_rate": 6.792338185570893e-06, "loss": 0.4552, "step": 8163 }, { "epoch": 1.8211019406647333, "grad_norm": 0.1827983409166336, "learning_rate": 6.790109204148311e-06, "loss": 0.435, "step": 8164 }, { "epoch": 1.8213250055766228, "grad_norm": 0.18367482721805573, "learning_rate": 6.787880400509674e-06, "loss": 0.4447, "step": 8165 }, { "epoch": 1.8215480704885123, "grad_norm": 0.17707587778568268, "learning_rate": 6.785651774778425e-06, "loss": 0.4555, "step": 8166 }, { "epoch": 1.8217711354004016, "grad_norm": 0.17791809141635895, "learning_rate": 6.7834233270780005e-06, "loss": 0.4766, "step": 8167 }, { "epoch": 1.8219942003122909, "grad_norm": 0.1814098060131073, "learning_rate": 6.781195057531824e-06, "loss": 0.4634, "step": 8168 }, { "epoch": 1.8222172652241801, "grad_norm": 0.1738249808549881, "learning_rate": 6.778966966263314e-06, "loss": 0.4515, "step": 8169 }, { "epoch": 1.8224403301360694, "grad_norm": 0.17479370534420013, "learning_rate": 6.7767390533958735e-06, "loss": 0.4376, "step": 8170 }, { "epoch": 1.822663395047959, "grad_norm": 0.17913582921028137, "learning_rate": 6.774511319052899e-06, "loss": 0.4571, "step": 8171 }, { "epoch": 1.8228864599598484, "grad_norm": 0.1840064972639084, "learning_rate": 6.7722837633577766e-06, "loss": 0.4191, "step": 8172 }, { "epoch": 1.8231095248717377, "grad_norm": 0.1903713196516037, "learning_rate": 6.770056386433884e-06, "loss": 0.4352, "step": 8173 }, { "epoch": 1.823332589783627, "grad_norm": 0.18376924097537994, "learning_rate": 6.767829188404585e-06, "loss": 0.4649, "step": 8174 }, { "epoch": 1.8235556546955163, "grad_norm": 0.17748649418354034, "learning_rate": 6.765602169393235e-06, "loss": 0.4321, "step": 8175 }, { "epoch": 1.8237787196074058, "grad_norm": 0.19591280817985535, "learning_rate": 6.763375329523185e-06, "loss": 0.4656, "step": 8176 }, { "epoch": 1.824001784519295, "grad_norm": 0.1707315593957901, "learning_rate": 6.761148668917766e-06, "loss": 0.4413, "step": 8177 }, { "epoch": 1.8242248494311846, "grad_norm": 0.17590689659118652, "learning_rate": 6.758922187700309e-06, "loss": 0.4682, "step": 8178 }, { "epoch": 1.8244479143430739, "grad_norm": 0.17584408819675446, "learning_rate": 6.756695885994126e-06, "loss": 0.4626, "step": 8179 }, { "epoch": 1.8246709792549631, "grad_norm": 0.1750144511461258, "learning_rate": 6.754469763922529e-06, "loss": 0.4217, "step": 8180 }, { "epoch": 1.8248940441668524, "grad_norm": 0.18190471827983856, "learning_rate": 6.752243821608808e-06, "loss": 0.448, "step": 8181 }, { "epoch": 1.825117109078742, "grad_norm": 0.1814090758562088, "learning_rate": 6.7500180591762575e-06, "loss": 0.442, "step": 8182 }, { "epoch": 1.8253401739906314, "grad_norm": 0.18113404512405396, "learning_rate": 6.747792476748147e-06, "loss": 0.4522, "step": 8183 }, { "epoch": 1.8255632389025207, "grad_norm": 0.1728588193655014, "learning_rate": 6.7455670744477484e-06, "loss": 0.4394, "step": 8184 }, { "epoch": 1.82578630381441, "grad_norm": 0.17083577811717987, "learning_rate": 6.743341852398315e-06, "loss": 0.4432, "step": 8185 }, { "epoch": 1.8260093687262993, "grad_norm": 0.17789053916931152, "learning_rate": 6.741116810723096e-06, "loss": 0.4453, "step": 8186 }, { "epoch": 1.8262324336381888, "grad_norm": 0.18853406608104706, "learning_rate": 6.738891949545325e-06, "loss": 0.4398, "step": 8187 }, { "epoch": 1.826455498550078, "grad_norm": 0.1743546426296234, "learning_rate": 6.736667268988235e-06, "loss": 0.4292, "step": 8188 }, { "epoch": 1.8266785634619676, "grad_norm": 0.18301378190517426, "learning_rate": 6.734442769175036e-06, "loss": 0.4604, "step": 8189 }, { "epoch": 1.8269016283738568, "grad_norm": 0.18097054958343506, "learning_rate": 6.732218450228938e-06, "loss": 0.4585, "step": 8190 }, { "epoch": 1.8271246932857461, "grad_norm": 0.17369534075260162, "learning_rate": 6.729994312273137e-06, "loss": 0.4206, "step": 8191 }, { "epoch": 1.8273477581976354, "grad_norm": 0.19749490916728973, "learning_rate": 6.727770355430822e-06, "loss": 0.4724, "step": 8192 }, { "epoch": 1.827570823109525, "grad_norm": 0.17227788269519806, "learning_rate": 6.725546579825165e-06, "loss": 0.4397, "step": 8193 }, { "epoch": 1.8277938880214142, "grad_norm": 0.17980121076107025, "learning_rate": 6.723322985579338e-06, "loss": 0.4568, "step": 8194 }, { "epoch": 1.8280169529333037, "grad_norm": 0.1813877820968628, "learning_rate": 6.721099572816494e-06, "loss": 0.4348, "step": 8195 }, { "epoch": 1.828240017845193, "grad_norm": 0.1828600913286209, "learning_rate": 6.718876341659779e-06, "loss": 0.4685, "step": 8196 }, { "epoch": 1.8284630827570822, "grad_norm": 0.175664484500885, "learning_rate": 6.716653292232334e-06, "loss": 0.4359, "step": 8197 }, { "epoch": 1.8286861476689715, "grad_norm": 0.18827195465564728, "learning_rate": 6.714430424657281e-06, "loss": 0.4545, "step": 8198 }, { "epoch": 1.828909212580861, "grad_norm": 0.18291041254997253, "learning_rate": 6.7122077390577415e-06, "loss": 0.455, "step": 8199 }, { "epoch": 1.8291322774927505, "grad_norm": 0.17457644641399384, "learning_rate": 6.709985235556819e-06, "loss": 0.4355, "step": 8200 }, { "epoch": 1.8293553424046398, "grad_norm": 0.19204942882061005, "learning_rate": 6.707762914277604e-06, "loss": 0.4779, "step": 8201 }, { "epoch": 1.829578407316529, "grad_norm": 0.17864084243774414, "learning_rate": 6.705540775343192e-06, "loss": 0.4301, "step": 8202 }, { "epoch": 1.8298014722284184, "grad_norm": 0.17506073415279388, "learning_rate": 6.703318818876652e-06, "loss": 0.4389, "step": 8203 }, { "epoch": 1.8300245371403079, "grad_norm": 0.1823255568742752, "learning_rate": 6.701097045001055e-06, "loss": 0.4644, "step": 8204 }, { "epoch": 1.8302476020521972, "grad_norm": 0.192154660820961, "learning_rate": 6.698875453839453e-06, "loss": 0.447, "step": 8205 }, { "epoch": 1.8304706669640867, "grad_norm": 0.18675589561462402, "learning_rate": 6.696654045514894e-06, "loss": 0.4493, "step": 8206 }, { "epoch": 1.830693731875976, "grad_norm": 0.193002849817276, "learning_rate": 6.694432820150412e-06, "loss": 0.4124, "step": 8207 }, { "epoch": 1.8309167967878652, "grad_norm": 0.1812351495027542, "learning_rate": 6.692211777869033e-06, "loss": 0.4555, "step": 8208 }, { "epoch": 1.8311398616997545, "grad_norm": 0.18915650248527527, "learning_rate": 6.689990918793773e-06, "loss": 0.4678, "step": 8209 }, { "epoch": 1.831362926611644, "grad_norm": 0.18118512630462646, "learning_rate": 6.6877702430476374e-06, "loss": 0.4425, "step": 8210 }, { "epoch": 1.8315859915235333, "grad_norm": 0.17849402129650116, "learning_rate": 6.68554975075362e-06, "loss": 0.453, "step": 8211 }, { "epoch": 1.8318090564354228, "grad_norm": 0.17353329062461853, "learning_rate": 6.683329442034707e-06, "loss": 0.4514, "step": 8212 }, { "epoch": 1.832032121347312, "grad_norm": 0.1685749590396881, "learning_rate": 6.681109317013869e-06, "loss": 0.4358, "step": 8213 }, { "epoch": 1.8322551862592014, "grad_norm": 0.17803844809532166, "learning_rate": 6.678889375814077e-06, "loss": 0.4495, "step": 8214 }, { "epoch": 1.8324782511710906, "grad_norm": 0.18683882057666779, "learning_rate": 6.67666961855828e-06, "loss": 0.4501, "step": 8215 }, { "epoch": 1.8327013160829801, "grad_norm": 0.18386538326740265, "learning_rate": 6.674450045369427e-06, "loss": 0.4349, "step": 8216 }, { "epoch": 1.8329243809948697, "grad_norm": 0.20062412321567535, "learning_rate": 6.672230656370446e-06, "loss": 0.4646, "step": 8217 }, { "epoch": 1.833147445906759, "grad_norm": 0.17725883424282074, "learning_rate": 6.670011451684266e-06, "loss": 0.452, "step": 8218 }, { "epoch": 1.8333705108186482, "grad_norm": 0.16746068000793457, "learning_rate": 6.667792431433796e-06, "loss": 0.4277, "step": 8219 }, { "epoch": 1.8335935757305375, "grad_norm": 0.18574824929237366, "learning_rate": 6.665573595741941e-06, "loss": 0.4381, "step": 8220 }, { "epoch": 1.833816640642427, "grad_norm": 0.18854008615016937, "learning_rate": 6.663354944731598e-06, "loss": 0.4505, "step": 8221 }, { "epoch": 1.8340397055543163, "grad_norm": 0.18151527643203735, "learning_rate": 6.661136478525644e-06, "loss": 0.4744, "step": 8222 }, { "epoch": 1.8342627704662058, "grad_norm": 0.17463651299476624, "learning_rate": 6.6589181972469556e-06, "loss": 0.4667, "step": 8223 }, { "epoch": 1.834485835378095, "grad_norm": 0.17236928641796112, "learning_rate": 6.656700101018392e-06, "loss": 0.4546, "step": 8224 }, { "epoch": 1.8347089002899843, "grad_norm": 0.1812690645456314, "learning_rate": 6.65448218996281e-06, "loss": 0.4311, "step": 8225 }, { "epoch": 1.8349319652018736, "grad_norm": 0.2300224006175995, "learning_rate": 6.6522644642030445e-06, "loss": 0.4551, "step": 8226 }, { "epoch": 1.8351550301137631, "grad_norm": 0.17556323111057281, "learning_rate": 6.650046923861935e-06, "loss": 0.4333, "step": 8227 }, { "epoch": 1.8353780950256524, "grad_norm": 0.17821857333183289, "learning_rate": 6.647829569062295e-06, "loss": 0.4486, "step": 8228 }, { "epoch": 1.835601159937542, "grad_norm": 0.23370222747325897, "learning_rate": 6.645612399926941e-06, "loss": 0.4446, "step": 8229 }, { "epoch": 1.8358242248494312, "grad_norm": 0.1826113909482956, "learning_rate": 6.643395416578673e-06, "loss": 0.4362, "step": 8230 }, { "epoch": 1.8360472897613205, "grad_norm": 0.1840929388999939, "learning_rate": 6.641178619140282e-06, "loss": 0.4662, "step": 8231 }, { "epoch": 1.8362703546732098, "grad_norm": 0.19260503351688385, "learning_rate": 6.638962007734544e-06, "loss": 0.4709, "step": 8232 }, { "epoch": 1.8364934195850993, "grad_norm": 0.17979590594768524, "learning_rate": 6.636745582484234e-06, "loss": 0.4509, "step": 8233 }, { "epoch": 1.8367164844969888, "grad_norm": 0.17919979989528656, "learning_rate": 6.634529343512107e-06, "loss": 0.4582, "step": 8234 }, { "epoch": 1.836939549408878, "grad_norm": 0.1846793293952942, "learning_rate": 6.632313290940917e-06, "loss": 0.4356, "step": 8235 }, { "epoch": 1.8371626143207673, "grad_norm": 0.18610712885856628, "learning_rate": 6.630097424893398e-06, "loss": 0.4555, "step": 8236 }, { "epoch": 1.8373856792326566, "grad_norm": 0.17965011298656464, "learning_rate": 6.6278817454922835e-06, "loss": 0.4494, "step": 8237 }, { "epoch": 1.8376087441445461, "grad_norm": 0.1982518583536148, "learning_rate": 6.625666252860288e-06, "loss": 0.4644, "step": 8238 }, { "epoch": 1.8378318090564354, "grad_norm": 0.18403403460979462, "learning_rate": 6.6234509471201205e-06, "loss": 0.4501, "step": 8239 }, { "epoch": 1.838054873968325, "grad_norm": 0.17158350348472595, "learning_rate": 6.621235828394481e-06, "loss": 0.4386, "step": 8240 }, { "epoch": 1.8382779388802142, "grad_norm": 0.1909009963274002, "learning_rate": 6.619020896806052e-06, "loss": 0.4464, "step": 8241 }, { "epoch": 1.8385010037921035, "grad_norm": 0.18062900006771088, "learning_rate": 6.616806152477515e-06, "loss": 0.4657, "step": 8242 }, { "epoch": 1.8387240687039927, "grad_norm": 0.1820714771747589, "learning_rate": 6.614591595531533e-06, "loss": 0.4723, "step": 8243 }, { "epoch": 1.8389471336158822, "grad_norm": 0.18476656079292297, "learning_rate": 6.612377226090764e-06, "loss": 0.4457, "step": 8244 }, { "epoch": 1.8391701985277715, "grad_norm": 0.17424450814723969, "learning_rate": 6.610163044277853e-06, "loss": 0.444, "step": 8245 }, { "epoch": 1.839393263439661, "grad_norm": 0.19445692002773285, "learning_rate": 6.607949050215438e-06, "loss": 0.4446, "step": 8246 }, { "epoch": 1.8396163283515503, "grad_norm": 0.18586407601833344, "learning_rate": 6.605735244026138e-06, "loss": 0.4495, "step": 8247 }, { "epoch": 1.8398393932634396, "grad_norm": 0.17516399919986725, "learning_rate": 6.6035216258325745e-06, "loss": 0.4261, "step": 8248 }, { "epoch": 1.8400624581753289, "grad_norm": 0.17480303347110748, "learning_rate": 6.601308195757343e-06, "loss": 0.4283, "step": 8249 }, { "epoch": 1.8402855230872184, "grad_norm": 0.1745281219482422, "learning_rate": 6.599094953923046e-06, "loss": 0.4384, "step": 8250 }, { "epoch": 1.8405085879991079, "grad_norm": 0.18595942854881287, "learning_rate": 6.5968819004522625e-06, "loss": 0.4519, "step": 8251 }, { "epoch": 1.8407316529109972, "grad_norm": 0.17765408754348755, "learning_rate": 6.594669035467567e-06, "loss": 0.4463, "step": 8252 }, { "epoch": 1.8409547178228864, "grad_norm": 0.17620229721069336, "learning_rate": 6.592456359091517e-06, "loss": 0.4545, "step": 8253 }, { "epoch": 1.8411777827347757, "grad_norm": 0.18198491632938385, "learning_rate": 6.5902438714466706e-06, "loss": 0.4706, "step": 8254 }, { "epoch": 1.8414008476466652, "grad_norm": 0.1813773214817047, "learning_rate": 6.588031572655566e-06, "loss": 0.4435, "step": 8255 }, { "epoch": 1.8416239125585545, "grad_norm": 0.1799456626176834, "learning_rate": 6.585819462840737e-06, "loss": 0.4108, "step": 8256 }, { "epoch": 1.841846977470444, "grad_norm": 0.18853728473186493, "learning_rate": 6.583607542124699e-06, "loss": 0.4527, "step": 8257 }, { "epoch": 1.8420700423823333, "grad_norm": 0.1802626997232437, "learning_rate": 6.581395810629969e-06, "loss": 0.4413, "step": 8258 }, { "epoch": 1.8422931072942226, "grad_norm": 0.18371166288852692, "learning_rate": 6.57918426847904e-06, "loss": 0.4408, "step": 8259 }, { "epoch": 1.8425161722061119, "grad_norm": 0.17911924421787262, "learning_rate": 6.576972915794404e-06, "loss": 0.444, "step": 8260 }, { "epoch": 1.8427392371180014, "grad_norm": 0.1887340396642685, "learning_rate": 6.574761752698542e-06, "loss": 0.4522, "step": 8261 }, { "epoch": 1.8429623020298909, "grad_norm": 0.2037588208913803, "learning_rate": 6.572550779313916e-06, "loss": 0.4419, "step": 8262 }, { "epoch": 1.8431853669417801, "grad_norm": 0.17989180982112885, "learning_rate": 6.570339995762991e-06, "loss": 0.4453, "step": 8263 }, { "epoch": 1.8434084318536694, "grad_norm": 0.17799720168113708, "learning_rate": 6.5681294021682084e-06, "loss": 0.4333, "step": 8264 }, { "epoch": 1.8436314967655587, "grad_norm": 0.17597989737987518, "learning_rate": 6.5659189986520085e-06, "loss": 0.4441, "step": 8265 }, { "epoch": 1.843854561677448, "grad_norm": 0.1881999969482422, "learning_rate": 6.563708785336813e-06, "loss": 0.4674, "step": 8266 }, { "epoch": 1.8440776265893375, "grad_norm": 0.18646754324436188, "learning_rate": 6.561498762345044e-06, "loss": 0.4533, "step": 8267 }, { "epoch": 1.844300691501227, "grad_norm": 0.1898210197687149, "learning_rate": 6.559288929799099e-06, "loss": 0.4494, "step": 8268 }, { "epoch": 1.8445237564131163, "grad_norm": 0.1805214136838913, "learning_rate": 6.557079287821378e-06, "loss": 0.4533, "step": 8269 }, { "epoch": 1.8447468213250056, "grad_norm": 0.17660482227802277, "learning_rate": 6.554869836534261e-06, "loss": 0.4237, "step": 8270 }, { "epoch": 1.8449698862368948, "grad_norm": 0.1824430525302887, "learning_rate": 6.552660576060126e-06, "loss": 0.4767, "step": 8271 }, { "epoch": 1.8451929511487843, "grad_norm": 0.1871567666530609, "learning_rate": 6.55045150652133e-06, "loss": 0.4576, "step": 8272 }, { "epoch": 1.8454160160606736, "grad_norm": 0.17943458259105682, "learning_rate": 6.548242628040231e-06, "loss": 0.4475, "step": 8273 }, { "epoch": 1.8456390809725631, "grad_norm": 0.1877557635307312, "learning_rate": 6.5460339407391645e-06, "loss": 0.4419, "step": 8274 }, { "epoch": 1.8458621458844524, "grad_norm": 0.17813464999198914, "learning_rate": 6.543825444740468e-06, "loss": 0.4647, "step": 8275 }, { "epoch": 1.8460852107963417, "grad_norm": 0.18218432366847992, "learning_rate": 6.5416171401664565e-06, "loss": 0.4735, "step": 8276 }, { "epoch": 1.846308275708231, "grad_norm": 0.1764039546251297, "learning_rate": 6.539409027139443e-06, "loss": 0.4525, "step": 8277 }, { "epoch": 1.8465313406201205, "grad_norm": 0.1932554543018341, "learning_rate": 6.537201105781723e-06, "loss": 0.4911, "step": 8278 }, { "epoch": 1.84675440553201, "grad_norm": 0.1814078986644745, "learning_rate": 6.53499337621559e-06, "loss": 0.4461, "step": 8279 }, { "epoch": 1.8469774704438993, "grad_norm": 0.173579141497612, "learning_rate": 6.5327858385633184e-06, "loss": 0.4262, "step": 8280 }, { "epoch": 1.8472005353557885, "grad_norm": 0.18026985228061676, "learning_rate": 6.530578492947177e-06, "loss": 0.4385, "step": 8281 }, { "epoch": 1.8474236002676778, "grad_norm": 0.18310683965682983, "learning_rate": 6.528371339489428e-06, "loss": 0.4574, "step": 8282 }, { "epoch": 1.847646665179567, "grad_norm": 0.18035286664962769, "learning_rate": 6.526164378312305e-06, "loss": 0.4579, "step": 8283 }, { "epoch": 1.8478697300914566, "grad_norm": 0.20232632756233215, "learning_rate": 6.523957609538049e-06, "loss": 0.4492, "step": 8284 }, { "epoch": 1.8480927950033461, "grad_norm": 0.180943563580513, "learning_rate": 6.5217510332888904e-06, "loss": 0.4339, "step": 8285 }, { "epoch": 1.8483158599152354, "grad_norm": 0.17553845047950745, "learning_rate": 6.519544649687034e-06, "loss": 0.4313, "step": 8286 }, { "epoch": 1.8485389248271247, "grad_norm": 0.17904485762119293, "learning_rate": 6.517338458854692e-06, "loss": 0.4484, "step": 8287 }, { "epoch": 1.848761989739014, "grad_norm": 0.17891845107078552, "learning_rate": 6.515132460914049e-06, "loss": 0.4655, "step": 8288 }, { "epoch": 1.8489850546509035, "grad_norm": 0.19273404777050018, "learning_rate": 6.512926655987295e-06, "loss": 0.4718, "step": 8289 }, { "epoch": 1.8492081195627927, "grad_norm": 0.21862982213497162, "learning_rate": 6.510721044196593e-06, "loss": 0.4678, "step": 8290 }, { "epoch": 1.8494311844746822, "grad_norm": 0.17714935541152954, "learning_rate": 6.508515625664111e-06, "loss": 0.4626, "step": 8291 }, { "epoch": 1.8496542493865715, "grad_norm": 0.1834956407546997, "learning_rate": 6.5063104005119945e-06, "loss": 0.4463, "step": 8292 }, { "epoch": 1.8498773142984608, "grad_norm": 0.17443230748176575, "learning_rate": 6.504105368862386e-06, "loss": 0.4563, "step": 8293 }, { "epoch": 1.85010037921035, "grad_norm": 0.17853355407714844, "learning_rate": 6.50190053083741e-06, "loss": 0.4588, "step": 8294 }, { "epoch": 1.8503234441222396, "grad_norm": 0.17415203154087067, "learning_rate": 6.4996958865591895e-06, "loss": 0.4303, "step": 8295 }, { "epoch": 1.850546509034129, "grad_norm": 0.169075608253479, "learning_rate": 6.497491436149826e-06, "loss": 0.4292, "step": 8296 }, { "epoch": 1.8507695739460184, "grad_norm": 0.1862117052078247, "learning_rate": 6.49528717973142e-06, "loss": 0.4308, "step": 8297 }, { "epoch": 1.8509926388579077, "grad_norm": 0.1834726333618164, "learning_rate": 6.4930831174260535e-06, "loss": 0.4493, "step": 8298 }, { "epoch": 1.851215703769797, "grad_norm": 0.18994340300559998, "learning_rate": 6.490879249355806e-06, "loss": 0.4688, "step": 8299 }, { "epoch": 1.8514387686816862, "grad_norm": 0.18322892487049103, "learning_rate": 6.4886755756427365e-06, "loss": 0.4473, "step": 8300 }, { "epoch": 1.8516618335935757, "grad_norm": 0.1817985475063324, "learning_rate": 6.486472096408903e-06, "loss": 0.4633, "step": 8301 }, { "epoch": 1.8518848985054652, "grad_norm": 0.17432516813278198, "learning_rate": 6.484268811776343e-06, "loss": 0.4574, "step": 8302 }, { "epoch": 1.8521079634173545, "grad_norm": 0.17498242855072021, "learning_rate": 6.482065721867095e-06, "loss": 0.4379, "step": 8303 }, { "epoch": 1.8523310283292438, "grad_norm": 0.18785923719406128, "learning_rate": 6.479862826803172e-06, "loss": 0.4572, "step": 8304 }, { "epoch": 1.852554093241133, "grad_norm": 0.1780933290719986, "learning_rate": 6.477660126706588e-06, "loss": 0.4161, "step": 8305 }, { "epoch": 1.8527771581530226, "grad_norm": 0.17613643407821655, "learning_rate": 6.4754576216993455e-06, "loss": 0.4323, "step": 8306 }, { "epoch": 1.8530002230649119, "grad_norm": 0.17086435854434967, "learning_rate": 6.473255311903428e-06, "loss": 0.4497, "step": 8307 }, { "epoch": 1.8532232879768014, "grad_norm": 0.18054986000061035, "learning_rate": 6.471053197440817e-06, "loss": 0.4697, "step": 8308 }, { "epoch": 1.8534463528886906, "grad_norm": 0.17696300148963928, "learning_rate": 6.468851278433476e-06, "loss": 0.4704, "step": 8309 }, { "epoch": 1.85366941780058, "grad_norm": 0.18155822157859802, "learning_rate": 6.466649555003366e-06, "loss": 0.4576, "step": 8310 }, { "epoch": 1.8538924827124692, "grad_norm": 0.18807131052017212, "learning_rate": 6.464448027272427e-06, "loss": 0.4651, "step": 8311 }, { "epoch": 1.8541155476243587, "grad_norm": 0.1846548169851303, "learning_rate": 6.462246695362597e-06, "loss": 0.4697, "step": 8312 }, { "epoch": 1.8543386125362482, "grad_norm": 0.18467946350574493, "learning_rate": 6.460045559395797e-06, "loss": 0.4474, "step": 8313 }, { "epoch": 1.8545616774481375, "grad_norm": 0.18795856833457947, "learning_rate": 6.457844619493943e-06, "loss": 0.461, "step": 8314 }, { "epoch": 1.8547847423600268, "grad_norm": 0.1844158172607422, "learning_rate": 6.455643875778932e-06, "loss": 0.4456, "step": 8315 }, { "epoch": 1.855007807271916, "grad_norm": 0.19735929369926453, "learning_rate": 6.453443328372662e-06, "loss": 0.4785, "step": 8316 }, { "epoch": 1.8552308721838053, "grad_norm": 0.187907874584198, "learning_rate": 6.451242977397006e-06, "loss": 0.4334, "step": 8317 }, { "epoch": 1.8554539370956948, "grad_norm": 0.18026615679264069, "learning_rate": 6.449042822973839e-06, "loss": 0.4454, "step": 8318 }, { "epoch": 1.8556770020075843, "grad_norm": 0.17745928466320038, "learning_rate": 6.446842865225014e-06, "loss": 0.4621, "step": 8319 }, { "epoch": 1.8559000669194736, "grad_norm": 0.17665286362171173, "learning_rate": 6.4446431042723835e-06, "loss": 0.4459, "step": 8320 }, { "epoch": 1.856123131831363, "grad_norm": 0.18266570568084717, "learning_rate": 6.44244354023778e-06, "loss": 0.466, "step": 8321 }, { "epoch": 1.8563461967432522, "grad_norm": 0.27143698930740356, "learning_rate": 6.440244173243033e-06, "loss": 0.4728, "step": 8322 }, { "epoch": 1.8565692616551417, "grad_norm": 0.18305523693561554, "learning_rate": 6.438045003409954e-06, "loss": 0.4272, "step": 8323 }, { "epoch": 1.856792326567031, "grad_norm": 0.17935436964035034, "learning_rate": 6.435846030860349e-06, "loss": 0.4616, "step": 8324 }, { "epoch": 1.8570153914789205, "grad_norm": 0.17304593324661255, "learning_rate": 6.433647255716011e-06, "loss": 0.4485, "step": 8325 }, { "epoch": 1.8572384563908098, "grad_norm": 0.1776076853275299, "learning_rate": 6.43144867809872e-06, "loss": 0.4431, "step": 8326 }, { "epoch": 1.857461521302699, "grad_norm": 0.18469847738742828, "learning_rate": 6.4292502981302494e-06, "loss": 0.4633, "step": 8327 }, { "epoch": 1.8576845862145883, "grad_norm": 0.18084385991096497, "learning_rate": 6.427052115932357e-06, "loss": 0.4558, "step": 8328 }, { "epoch": 1.8579076511264778, "grad_norm": 0.1821753978729248, "learning_rate": 6.424854131626796e-06, "loss": 0.4565, "step": 8329 }, { "epoch": 1.8581307160383673, "grad_norm": 0.17873086035251617, "learning_rate": 6.422656345335299e-06, "loss": 0.4521, "step": 8330 }, { "epoch": 1.8583537809502566, "grad_norm": 0.2390872985124588, "learning_rate": 6.420458757179597e-06, "loss": 0.4443, "step": 8331 }, { "epoch": 1.8585768458621459, "grad_norm": 0.1892087161540985, "learning_rate": 6.418261367281404e-06, "loss": 0.4496, "step": 8332 }, { "epoch": 1.8587999107740352, "grad_norm": 0.1815430074930191, "learning_rate": 6.416064175762428e-06, "loss": 0.4725, "step": 8333 }, { "epoch": 1.8590229756859245, "grad_norm": 0.17400890588760376, "learning_rate": 6.41386718274436e-06, "loss": 0.4264, "step": 8334 }, { "epoch": 1.859246040597814, "grad_norm": 0.17337702214717865, "learning_rate": 6.4116703883488875e-06, "loss": 0.4363, "step": 8335 }, { "epoch": 1.8594691055097035, "grad_norm": 0.1814270168542862, "learning_rate": 6.409473792697678e-06, "loss": 0.4413, "step": 8336 }, { "epoch": 1.8596921704215927, "grad_norm": 0.17769312858581543, "learning_rate": 6.407277395912397e-06, "loss": 0.4651, "step": 8337 }, { "epoch": 1.859915235333482, "grad_norm": 0.18968099355697632, "learning_rate": 6.405081198114692e-06, "loss": 0.4984, "step": 8338 }, { "epoch": 1.8601383002453713, "grad_norm": 0.17772215604782104, "learning_rate": 6.4028851994262045e-06, "loss": 0.4267, "step": 8339 }, { "epoch": 1.8603613651572608, "grad_norm": 0.18239617347717285, "learning_rate": 6.400689399968559e-06, "loss": 0.47, "step": 8340 }, { "epoch": 1.86058443006915, "grad_norm": 0.1779201775789261, "learning_rate": 6.398493799863378e-06, "loss": 0.4684, "step": 8341 }, { "epoch": 1.8608074949810396, "grad_norm": 0.18368324637413025, "learning_rate": 6.396298399232261e-06, "loss": 0.4599, "step": 8342 }, { "epoch": 1.8610305598929289, "grad_norm": 0.1744937300682068, "learning_rate": 6.39410319819681e-06, "loss": 0.4278, "step": 8343 }, { "epoch": 1.8612536248048182, "grad_norm": 0.17699581384658813, "learning_rate": 6.391908196878605e-06, "loss": 0.4393, "step": 8344 }, { "epoch": 1.8614766897167074, "grad_norm": 0.17797012627124786, "learning_rate": 6.389713395399219e-06, "loss": 0.4653, "step": 8345 }, { "epoch": 1.861699754628597, "grad_norm": 0.17617610096931458, "learning_rate": 6.3875187938802164e-06, "loss": 0.4577, "step": 8346 }, { "epoch": 1.8619228195404864, "grad_norm": 0.17390653491020203, "learning_rate": 6.385324392443144e-06, "loss": 0.4215, "step": 8347 }, { "epoch": 1.8621458844523757, "grad_norm": 0.1813625991344452, "learning_rate": 6.3831301912095465e-06, "loss": 0.429, "step": 8348 }, { "epoch": 1.862368949364265, "grad_norm": 0.17084379494190216, "learning_rate": 6.380936190300948e-06, "loss": 0.4402, "step": 8349 }, { "epoch": 1.8625920142761543, "grad_norm": 0.18061068654060364, "learning_rate": 6.37874238983887e-06, "loss": 0.4657, "step": 8350 }, { "epoch": 1.8628150791880436, "grad_norm": 0.18320946395397186, "learning_rate": 6.376548789944817e-06, "loss": 0.4526, "step": 8351 }, { "epoch": 1.863038144099933, "grad_norm": 0.17092865705490112, "learning_rate": 6.374355390740284e-06, "loss": 0.4259, "step": 8352 }, { "epoch": 1.8632612090118226, "grad_norm": 0.17330855131149292, "learning_rate": 6.372162192346754e-06, "loss": 0.4313, "step": 8353 }, { "epoch": 1.8634842739237119, "grad_norm": 0.17183774709701538, "learning_rate": 6.369969194885703e-06, "loss": 0.4601, "step": 8354 }, { "epoch": 1.8637073388356011, "grad_norm": 0.19768626987934113, "learning_rate": 6.367776398478591e-06, "loss": 0.491, "step": 8355 }, { "epoch": 1.8639304037474904, "grad_norm": 0.1920434832572937, "learning_rate": 6.365583803246872e-06, "loss": 0.444, "step": 8356 }, { "epoch": 1.86415346865938, "grad_norm": 0.18218182027339935, "learning_rate": 6.3633914093119804e-06, "loss": 0.5027, "step": 8357 }, { "epoch": 1.8643765335712692, "grad_norm": 0.18595336377620697, "learning_rate": 6.36119921679535e-06, "loss": 0.4463, "step": 8358 }, { "epoch": 1.8645995984831587, "grad_norm": 0.19364967942237854, "learning_rate": 6.359007225818395e-06, "loss": 0.4637, "step": 8359 }, { "epoch": 1.864822663395048, "grad_norm": 0.18406571447849274, "learning_rate": 6.356815436502524e-06, "loss": 0.4745, "step": 8360 }, { "epoch": 1.8650457283069373, "grad_norm": 0.18429051339626312, "learning_rate": 6.354623848969128e-06, "loss": 0.4379, "step": 8361 }, { "epoch": 1.8652687932188265, "grad_norm": 0.18177488446235657, "learning_rate": 6.352432463339596e-06, "loss": 0.4565, "step": 8362 }, { "epoch": 1.865491858130716, "grad_norm": 0.1876797080039978, "learning_rate": 6.350241279735301e-06, "loss": 0.4532, "step": 8363 }, { "epoch": 1.8657149230426056, "grad_norm": 0.18419890105724335, "learning_rate": 6.3480502982775974e-06, "loss": 0.4485, "step": 8364 }, { "epoch": 1.8659379879544948, "grad_norm": 0.1855155974626541, "learning_rate": 6.3458595190878414e-06, "loss": 0.455, "step": 8365 }, { "epoch": 1.8661610528663841, "grad_norm": 0.18031683564186096, "learning_rate": 6.343668942287369e-06, "loss": 0.4502, "step": 8366 }, { "epoch": 1.8663841177782734, "grad_norm": 0.18513405323028564, "learning_rate": 6.341478567997512e-06, "loss": 0.458, "step": 8367 }, { "epoch": 1.8666071826901627, "grad_norm": 0.1804891675710678, "learning_rate": 6.3392883963395826e-06, "loss": 0.4391, "step": 8368 }, { "epoch": 1.8668302476020522, "grad_norm": 0.17720703780651093, "learning_rate": 6.337098427434889e-06, "loss": 0.4451, "step": 8369 }, { "epoch": 1.8670533125139417, "grad_norm": 0.17895428836345673, "learning_rate": 6.334908661404726e-06, "loss": 0.4603, "step": 8370 }, { "epoch": 1.867276377425831, "grad_norm": 0.1729314774274826, "learning_rate": 6.3327190983703745e-06, "loss": 0.4544, "step": 8371 }, { "epoch": 1.8674994423377203, "grad_norm": 0.17181377112865448, "learning_rate": 6.330529738453109e-06, "loss": 0.4406, "step": 8372 }, { "epoch": 1.8677225072496095, "grad_norm": 0.17984220385551453, "learning_rate": 6.328340581774187e-06, "loss": 0.4796, "step": 8373 }, { "epoch": 1.867945572161499, "grad_norm": 0.1723097413778305, "learning_rate": 6.326151628454862e-06, "loss": 0.4341, "step": 8374 }, { "epoch": 1.8681686370733883, "grad_norm": 0.17812368273735046, "learning_rate": 6.323962878616366e-06, "loss": 0.4558, "step": 8375 }, { "epoch": 1.8683917019852778, "grad_norm": 0.2598254680633545, "learning_rate": 6.3217743323799305e-06, "loss": 0.4445, "step": 8376 }, { "epoch": 1.868614766897167, "grad_norm": 0.17721040546894073, "learning_rate": 6.3195859898667675e-06, "loss": 0.4308, "step": 8377 }, { "epoch": 1.8688378318090564, "grad_norm": 0.1813073456287384, "learning_rate": 6.317397851198087e-06, "loss": 0.4672, "step": 8378 }, { "epoch": 1.8690608967209457, "grad_norm": 0.17863677442073822, "learning_rate": 6.315209916495076e-06, "loss": 0.4606, "step": 8379 }, { "epoch": 1.8692839616328352, "grad_norm": 0.1793542504310608, "learning_rate": 6.313022185878918e-06, "loss": 0.4879, "step": 8380 }, { "epoch": 1.8695070265447247, "grad_norm": 0.18399956822395325, "learning_rate": 6.310834659470781e-06, "loss": 0.4591, "step": 8381 }, { "epoch": 1.869730091456614, "grad_norm": 0.18076615035533905, "learning_rate": 6.308647337391831e-06, "loss": 0.4385, "step": 8382 }, { "epoch": 1.8699531563685032, "grad_norm": 0.1910838931798935, "learning_rate": 6.306460219763207e-06, "loss": 0.4298, "step": 8383 }, { "epoch": 1.8701762212803925, "grad_norm": 0.191563680768013, "learning_rate": 6.304273306706052e-06, "loss": 0.4685, "step": 8384 }, { "epoch": 1.8703992861922818, "grad_norm": 0.1867865025997162, "learning_rate": 6.302086598341486e-06, "loss": 0.4638, "step": 8385 }, { "epoch": 1.8706223511041713, "grad_norm": 0.18163664638996124, "learning_rate": 6.2999000947906275e-06, "loss": 0.4394, "step": 8386 }, { "epoch": 1.8708454160160608, "grad_norm": 0.1798231601715088, "learning_rate": 6.297713796174574e-06, "loss": 0.4885, "step": 8387 }, { "epoch": 1.87106848092795, "grad_norm": 0.19126291573047638, "learning_rate": 6.295527702614422e-06, "loss": 0.46, "step": 8388 }, { "epoch": 1.8712915458398394, "grad_norm": 0.17811056971549988, "learning_rate": 6.293341814231244e-06, "loss": 0.4518, "step": 8389 }, { "epoch": 1.8715146107517286, "grad_norm": 0.1750759333372116, "learning_rate": 6.2911561311461126e-06, "loss": 0.445, "step": 8390 }, { "epoch": 1.8717376756636182, "grad_norm": 0.17805348336696625, "learning_rate": 6.288970653480087e-06, "loss": 0.4423, "step": 8391 }, { "epoch": 1.8719607405755074, "grad_norm": 0.18071311712265015, "learning_rate": 6.286785381354205e-06, "loss": 0.4472, "step": 8392 }, { "epoch": 1.872183805487397, "grad_norm": 0.1815258264541626, "learning_rate": 6.284600314889511e-06, "loss": 0.4749, "step": 8393 }, { "epoch": 1.8724068703992862, "grad_norm": 0.1812569499015808, "learning_rate": 6.28241545420702e-06, "loss": 0.4406, "step": 8394 }, { "epoch": 1.8726299353111755, "grad_norm": 0.1827443242073059, "learning_rate": 6.280230799427747e-06, "loss": 0.4698, "step": 8395 }, { "epoch": 1.8728530002230648, "grad_norm": 0.1790318340063095, "learning_rate": 6.278046350672689e-06, "loss": 0.4462, "step": 8396 }, { "epoch": 1.8730760651349543, "grad_norm": 0.18236975371837616, "learning_rate": 6.275862108062837e-06, "loss": 0.4655, "step": 8397 }, { "epoch": 1.8732991300468438, "grad_norm": 0.2039221227169037, "learning_rate": 6.273678071719166e-06, "loss": 0.4689, "step": 8398 }, { "epoch": 1.873522194958733, "grad_norm": 0.181179016828537, "learning_rate": 6.2714942417626445e-06, "loss": 0.436, "step": 8399 }, { "epoch": 1.8737452598706223, "grad_norm": 0.1835191249847412, "learning_rate": 6.269310618314223e-06, "loss": 0.4576, "step": 8400 }, { "epoch": 1.8739683247825116, "grad_norm": 0.19027259945869446, "learning_rate": 6.267127201494849e-06, "loss": 0.4814, "step": 8401 }, { "epoch": 1.874191389694401, "grad_norm": 0.17249448597431183, "learning_rate": 6.264943991425449e-06, "loss": 0.4466, "step": 8402 }, { "epoch": 1.8744144546062904, "grad_norm": 0.18657323718070984, "learning_rate": 6.262760988226948e-06, "loss": 0.4782, "step": 8403 }, { "epoch": 1.87463751951818, "grad_norm": 0.1854049414396286, "learning_rate": 6.260578192020249e-06, "loss": 0.4693, "step": 8404 }, { "epoch": 1.8748605844300692, "grad_norm": 0.1833823025226593, "learning_rate": 6.2583956029262526e-06, "loss": 0.4455, "step": 8405 }, { "epoch": 1.8750836493419585, "grad_norm": 0.2055628001689911, "learning_rate": 6.256213221065843e-06, "loss": 0.4378, "step": 8406 }, { "epoch": 1.8753067142538478, "grad_norm": 0.21103620529174805, "learning_rate": 6.254031046559896e-06, "loss": 0.4435, "step": 8407 }, { "epoch": 1.8755297791657373, "grad_norm": 0.17623326182365417, "learning_rate": 6.251849079529271e-06, "loss": 0.4526, "step": 8408 }, { "epoch": 1.8757528440776265, "grad_norm": 0.1766071766614914, "learning_rate": 6.2496673200948225e-06, "loss": 0.4459, "step": 8409 }, { "epoch": 1.875975908989516, "grad_norm": 0.175866037607193, "learning_rate": 6.247485768377386e-06, "loss": 0.456, "step": 8410 }, { "epoch": 1.8761989739014053, "grad_norm": 0.17706961929798126, "learning_rate": 6.245304424497792e-06, "loss": 0.4631, "step": 8411 }, { "epoch": 1.8764220388132946, "grad_norm": 0.17484982311725616, "learning_rate": 6.24312328857686e-06, "loss": 0.4689, "step": 8412 }, { "epoch": 1.876645103725184, "grad_norm": 0.18998976051807404, "learning_rate": 6.240942360735388e-06, "loss": 0.4627, "step": 8413 }, { "epoch": 1.8768681686370734, "grad_norm": 0.1831061840057373, "learning_rate": 6.238761641094178e-06, "loss": 0.4503, "step": 8414 }, { "epoch": 1.877091233548963, "grad_norm": 0.1826712042093277, "learning_rate": 6.236581129774003e-06, "loss": 0.4729, "step": 8415 }, { "epoch": 1.8773142984608522, "grad_norm": 0.1810642033815384, "learning_rate": 6.234400826895641e-06, "loss": 0.4462, "step": 8416 }, { "epoch": 1.8775373633727415, "grad_norm": 0.16975516080856323, "learning_rate": 6.232220732579845e-06, "loss": 0.4159, "step": 8417 }, { "epoch": 1.8777604282846307, "grad_norm": 0.1825772076845169, "learning_rate": 6.230040846947368e-06, "loss": 0.47, "step": 8418 }, { "epoch": 1.87798349319652, "grad_norm": 0.18529464304447174, "learning_rate": 6.227861170118941e-06, "loss": 0.4473, "step": 8419 }, { "epoch": 1.8782065581084095, "grad_norm": 0.17210659384727478, "learning_rate": 6.2256817022152915e-06, "loss": 0.4412, "step": 8420 }, { "epoch": 1.878429623020299, "grad_norm": 0.19293324649333954, "learning_rate": 6.223502443357129e-06, "loss": 0.4592, "step": 8421 }, { "epoch": 1.8786526879321883, "grad_norm": 0.16933956742286682, "learning_rate": 6.2213233936651586e-06, "loss": 0.433, "step": 8422 }, { "epoch": 1.8788757528440776, "grad_norm": 0.1740327775478363, "learning_rate": 6.219144553260065e-06, "loss": 0.4452, "step": 8423 }, { "epoch": 1.8790988177559669, "grad_norm": 0.18426866829395294, "learning_rate": 6.21696592226253e-06, "loss": 0.4658, "step": 8424 }, { "epoch": 1.8793218826678564, "grad_norm": 0.2005191296339035, "learning_rate": 6.214787500793218e-06, "loss": 0.4762, "step": 8425 }, { "epoch": 1.8795449475797457, "grad_norm": 0.18926943838596344, "learning_rate": 6.212609288972785e-06, "loss": 0.4584, "step": 8426 }, { "epoch": 1.8797680124916352, "grad_norm": 0.18263809382915497, "learning_rate": 6.210431286921872e-06, "loss": 0.4344, "step": 8427 }, { "epoch": 1.8799910774035244, "grad_norm": 0.18587824702262878, "learning_rate": 6.208253494761113e-06, "loss": 0.457, "step": 8428 }, { "epoch": 1.8802141423154137, "grad_norm": 0.1803131103515625, "learning_rate": 6.2060759126111246e-06, "loss": 0.4588, "step": 8429 }, { "epoch": 1.880437207227303, "grad_norm": 0.17640437185764313, "learning_rate": 6.203898540592517e-06, "loss": 0.4593, "step": 8430 }, { "epoch": 1.8806602721391925, "grad_norm": 0.17931349575519562, "learning_rate": 6.201721378825889e-06, "loss": 0.4371, "step": 8431 }, { "epoch": 1.880883337051082, "grad_norm": 0.19341467320919037, "learning_rate": 6.199544427431821e-06, "loss": 0.4422, "step": 8432 }, { "epoch": 1.8811064019629713, "grad_norm": 0.17641760408878326, "learning_rate": 6.19736768653089e-06, "loss": 0.4522, "step": 8433 }, { "epoch": 1.8813294668748606, "grad_norm": 0.18412084877490997, "learning_rate": 6.195191156243654e-06, "loss": 0.4819, "step": 8434 }, { "epoch": 1.8815525317867499, "grad_norm": 0.18774092197418213, "learning_rate": 6.193014836690667e-06, "loss": 0.4648, "step": 8435 }, { "epoch": 1.8817755966986391, "grad_norm": 0.1818709373474121, "learning_rate": 6.190838727992463e-06, "loss": 0.4621, "step": 8436 }, { "epoch": 1.8819986616105286, "grad_norm": 0.1864043027162552, "learning_rate": 6.188662830269574e-06, "loss": 0.4569, "step": 8437 }, { "epoch": 1.8822217265224181, "grad_norm": 0.18635167181491852, "learning_rate": 6.186487143642508e-06, "loss": 0.4451, "step": 8438 }, { "epoch": 1.8824447914343074, "grad_norm": 0.18478180468082428, "learning_rate": 6.184311668231775e-06, "loss": 0.4505, "step": 8439 }, { "epoch": 1.8826678563461967, "grad_norm": 0.1775161474943161, "learning_rate": 6.1821364041578615e-06, "loss": 0.4432, "step": 8440 }, { "epoch": 1.882890921258086, "grad_norm": 0.18662866950035095, "learning_rate": 6.179961351541252e-06, "loss": 0.4602, "step": 8441 }, { "epoch": 1.8831139861699755, "grad_norm": 0.1838211715221405, "learning_rate": 6.177786510502408e-06, "loss": 0.4519, "step": 8442 }, { "epoch": 1.8833370510818648, "grad_norm": 0.1826631873846054, "learning_rate": 6.1756118811617935e-06, "loss": 0.4505, "step": 8443 }, { "epoch": 1.8835601159937543, "grad_norm": 0.18082208931446075, "learning_rate": 6.173437463639846e-06, "loss": 0.4717, "step": 8444 }, { "epoch": 1.8837831809056436, "grad_norm": 0.18472546339035034, "learning_rate": 6.171263258057008e-06, "loss": 0.4626, "step": 8445 }, { "epoch": 1.8840062458175328, "grad_norm": 0.1825518012046814, "learning_rate": 6.16908926453369e-06, "loss": 0.4531, "step": 8446 }, { "epoch": 1.8842293107294221, "grad_norm": 0.23177875578403473, "learning_rate": 6.166915483190304e-06, "loss": 0.4379, "step": 8447 }, { "epoch": 1.8844523756413116, "grad_norm": 0.17296954989433289, "learning_rate": 6.164741914147252e-06, "loss": 0.4359, "step": 8448 }, { "epoch": 1.8846754405532011, "grad_norm": 0.18288472294807434, "learning_rate": 6.162568557524918e-06, "loss": 0.4547, "step": 8449 }, { "epoch": 1.8848985054650904, "grad_norm": 0.17033147811889648, "learning_rate": 6.160395413443674e-06, "loss": 0.4164, "step": 8450 }, { "epoch": 1.8851215703769797, "grad_norm": 0.1902470588684082, "learning_rate": 6.1582224820238836e-06, "loss": 0.4366, "step": 8451 }, { "epoch": 1.885344635288869, "grad_norm": 0.17565543949604034, "learning_rate": 6.156049763385901e-06, "loss": 0.4693, "step": 8452 }, { "epoch": 1.8855677002007583, "grad_norm": 0.1908363252878189, "learning_rate": 6.1538772576500584e-06, "loss": 0.4484, "step": 8453 }, { "epoch": 1.8857907651126478, "grad_norm": 0.1861358880996704, "learning_rate": 6.151704964936687e-06, "loss": 0.4531, "step": 8454 }, { "epoch": 1.8860138300245373, "grad_norm": 0.18762946128845215, "learning_rate": 6.149532885366103e-06, "loss": 0.4676, "step": 8455 }, { "epoch": 1.8862368949364265, "grad_norm": 0.19913704693317413, "learning_rate": 6.147361019058606e-06, "loss": 0.4429, "step": 8456 }, { "epoch": 1.8864599598483158, "grad_norm": 0.19710618257522583, "learning_rate": 6.145189366134492e-06, "loss": 0.4722, "step": 8457 }, { "epoch": 1.886683024760205, "grad_norm": 0.1801493912935257, "learning_rate": 6.143017926714036e-06, "loss": 0.4598, "step": 8458 }, { "epoch": 1.8869060896720946, "grad_norm": 0.2561694383621216, "learning_rate": 6.140846700917513e-06, "loss": 0.4218, "step": 8459 }, { "epoch": 1.887129154583984, "grad_norm": 0.17985545098781586, "learning_rate": 6.138675688865171e-06, "loss": 0.4269, "step": 8460 }, { "epoch": 1.8873522194958734, "grad_norm": 0.1875736266374588, "learning_rate": 6.13650489067726e-06, "loss": 0.4695, "step": 8461 }, { "epoch": 1.8875752844077627, "grad_norm": 0.18572880327701569, "learning_rate": 6.134334306474009e-06, "loss": 0.4448, "step": 8462 }, { "epoch": 1.887798349319652, "grad_norm": 0.17786027491092682, "learning_rate": 6.132163936375641e-06, "loss": 0.4505, "step": 8463 }, { "epoch": 1.8880214142315412, "grad_norm": 0.18378950655460358, "learning_rate": 6.129993780502364e-06, "loss": 0.4583, "step": 8464 }, { "epoch": 1.8882444791434307, "grad_norm": 0.1798183172941208, "learning_rate": 6.127823838974375e-06, "loss": 0.4491, "step": 8465 }, { "epoch": 1.8884675440553202, "grad_norm": 0.18183192610740662, "learning_rate": 6.125654111911857e-06, "loss": 0.4707, "step": 8466 }, { "epoch": 1.8886906089672095, "grad_norm": 0.17841511964797974, "learning_rate": 6.1234845994349875e-06, "loss": 0.4645, "step": 8467 }, { "epoch": 1.8889136738790988, "grad_norm": 0.17050506174564362, "learning_rate": 6.121315301663923e-06, "loss": 0.4241, "step": 8468 }, { "epoch": 1.889136738790988, "grad_norm": 0.18663549423217773, "learning_rate": 6.119146218718817e-06, "loss": 0.4736, "step": 8469 }, { "epoch": 1.8893598037028774, "grad_norm": 0.19147509336471558, "learning_rate": 6.116977350719801e-06, "loss": 0.4557, "step": 8470 }, { "epoch": 1.8895828686147669, "grad_norm": 0.18618586659431458, "learning_rate": 6.114808697787009e-06, "loss": 0.4469, "step": 8471 }, { "epoch": 1.8898059335266564, "grad_norm": 0.18382149934768677, "learning_rate": 6.112640260040548e-06, "loss": 0.4903, "step": 8472 }, { "epoch": 1.8900289984385457, "grad_norm": 0.20091551542282104, "learning_rate": 6.110472037600523e-06, "loss": 0.4578, "step": 8473 }, { "epoch": 1.890252063350435, "grad_norm": 0.1820792704820633, "learning_rate": 6.10830403058702e-06, "loss": 0.4645, "step": 8474 }, { "epoch": 1.8904751282623242, "grad_norm": 0.2219545990228653, "learning_rate": 6.106136239120121e-06, "loss": 0.4705, "step": 8475 }, { "epoch": 1.8906981931742137, "grad_norm": 0.17899756133556366, "learning_rate": 6.103968663319893e-06, "loss": 0.4646, "step": 8476 }, { "epoch": 1.890921258086103, "grad_norm": 0.17997188866138458, "learning_rate": 6.101801303306383e-06, "loss": 0.4517, "step": 8477 }, { "epoch": 1.8911443229979925, "grad_norm": 0.16982750594615936, "learning_rate": 6.099634159199641e-06, "loss": 0.4286, "step": 8478 }, { "epoch": 1.8913673879098818, "grad_norm": 0.17612092196941376, "learning_rate": 6.0974672311196916e-06, "loss": 0.4476, "step": 8479 }, { "epoch": 1.891590452821771, "grad_norm": 0.18440189957618713, "learning_rate": 6.095300519186555e-06, "loss": 0.4456, "step": 8480 }, { "epoch": 1.8918135177336604, "grad_norm": 0.16765691339969635, "learning_rate": 6.093134023520237e-06, "loss": 0.4215, "step": 8481 }, { "epoch": 1.8920365826455499, "grad_norm": 0.1789095401763916, "learning_rate": 6.090967744240734e-06, "loss": 0.4711, "step": 8482 }, { "epoch": 1.8922596475574394, "grad_norm": 0.18848133087158203, "learning_rate": 6.088801681468024e-06, "loss": 0.4513, "step": 8483 }, { "epoch": 1.8924827124693286, "grad_norm": 0.17877109348773956, "learning_rate": 6.086635835322081e-06, "loss": 0.4506, "step": 8484 }, { "epoch": 1.892705777381218, "grad_norm": 0.18696287274360657, "learning_rate": 6.084470205922859e-06, "loss": 0.4523, "step": 8485 }, { "epoch": 1.8929288422931072, "grad_norm": 0.18571056425571442, "learning_rate": 6.08230479339031e-06, "loss": 0.4646, "step": 8486 }, { "epoch": 1.8931519072049967, "grad_norm": 0.17447581887245178, "learning_rate": 6.080139597844361e-06, "loss": 0.4671, "step": 8487 }, { "epoch": 1.893374972116886, "grad_norm": 0.18128472566604614, "learning_rate": 6.0779746194049415e-06, "loss": 0.4419, "step": 8488 }, { "epoch": 1.8935980370287755, "grad_norm": 0.1792159378528595, "learning_rate": 6.075809858191957e-06, "loss": 0.4547, "step": 8489 }, { "epoch": 1.8938211019406648, "grad_norm": 0.17733284831047058, "learning_rate": 6.073645314325306e-06, "loss": 0.4177, "step": 8490 }, { "epoch": 1.894044166852554, "grad_norm": 0.1729053109884262, "learning_rate": 6.071480987924875e-06, "loss": 0.4555, "step": 8491 }, { "epoch": 1.8942672317644433, "grad_norm": 0.18540537357330322, "learning_rate": 6.069316879110539e-06, "loss": 0.4352, "step": 8492 }, { "epoch": 1.8944902966763328, "grad_norm": 0.17858169972896576, "learning_rate": 6.0671529880021585e-06, "loss": 0.4707, "step": 8493 }, { "epoch": 1.8947133615882221, "grad_norm": 0.1803089827299118, "learning_rate": 6.0649893147195845e-06, "loss": 0.4438, "step": 8494 }, { "epoch": 1.8949364265001116, "grad_norm": 0.18085269629955292, "learning_rate": 6.062825859382655e-06, "loss": 0.4419, "step": 8495 }, { "epoch": 1.895159491412001, "grad_norm": 0.18501605093479156, "learning_rate": 6.060662622111193e-06, "loss": 0.4698, "step": 8496 }, { "epoch": 1.8953825563238902, "grad_norm": 0.20104503631591797, "learning_rate": 6.058499603025018e-06, "loss": 0.459, "step": 8497 }, { "epoch": 1.8956056212357795, "grad_norm": 0.17107710242271423, "learning_rate": 6.056336802243925e-06, "loss": 0.4357, "step": 8498 }, { "epoch": 1.895828686147669, "grad_norm": 0.18555793166160583, "learning_rate": 6.0541742198877085e-06, "loss": 0.4562, "step": 8499 }, { "epoch": 1.8960517510595585, "grad_norm": 0.17548789083957672, "learning_rate": 6.052011856076142e-06, "loss": 0.4281, "step": 8500 }, { "epoch": 1.8962748159714478, "grad_norm": 0.19359424710273743, "learning_rate": 6.049849710928995e-06, "loss": 0.4761, "step": 8501 }, { "epoch": 1.896497880883337, "grad_norm": 0.1868746131658554, "learning_rate": 6.0476877845660165e-06, "loss": 0.4797, "step": 8502 }, { "epoch": 1.8967209457952263, "grad_norm": 0.18077945709228516, "learning_rate": 6.045526077106951e-06, "loss": 0.4245, "step": 8503 }, { "epoch": 1.8969440107071158, "grad_norm": 0.1770927757024765, "learning_rate": 6.0433645886715255e-06, "loss": 0.4483, "step": 8504 }, { "epoch": 1.897167075619005, "grad_norm": 0.18114930391311646, "learning_rate": 6.041203319379457e-06, "loss": 0.4253, "step": 8505 }, { "epoch": 1.8973901405308946, "grad_norm": 0.17783895134925842, "learning_rate": 6.039042269350451e-06, "loss": 0.4396, "step": 8506 }, { "epoch": 1.897613205442784, "grad_norm": 0.18055394291877747, "learning_rate": 6.036881438704198e-06, "loss": 0.4561, "step": 8507 }, { "epoch": 1.8978362703546732, "grad_norm": 0.17930994927883148, "learning_rate": 6.034720827560381e-06, "loss": 0.4489, "step": 8508 }, { "epoch": 1.8980593352665625, "grad_norm": 0.18059539794921875, "learning_rate": 6.0325604360386684e-06, "loss": 0.4519, "step": 8509 }, { "epoch": 1.898282400178452, "grad_norm": 0.19161933660507202, "learning_rate": 6.0304002642587135e-06, "loss": 0.4771, "step": 8510 }, { "epoch": 1.8985054650903412, "grad_norm": 0.1823475956916809, "learning_rate": 6.028240312340163e-06, "loss": 0.4446, "step": 8511 }, { "epoch": 1.8987285300022307, "grad_norm": 0.1844417154788971, "learning_rate": 6.026080580402645e-06, "loss": 0.4526, "step": 8512 }, { "epoch": 1.89895159491412, "grad_norm": 0.1865631490945816, "learning_rate": 6.023921068565783e-06, "loss": 0.4767, "step": 8513 }, { "epoch": 1.8991746598260093, "grad_norm": 0.18571047484874725, "learning_rate": 6.021761776949182e-06, "loss": 0.4429, "step": 8514 }, { "epoch": 1.8993977247378986, "grad_norm": 0.18585099279880524, "learning_rate": 6.019602705672441e-06, "loss": 0.4608, "step": 8515 }, { "epoch": 1.899620789649788, "grad_norm": 0.17889703810214996, "learning_rate": 6.017443854855136e-06, "loss": 0.4121, "step": 8516 }, { "epoch": 1.8998438545616776, "grad_norm": 0.1884198635816574, "learning_rate": 6.015285224616843e-06, "loss": 0.4274, "step": 8517 }, { "epoch": 1.9000669194735669, "grad_norm": 0.18577080965042114, "learning_rate": 6.0131268150771194e-06, "loss": 0.4466, "step": 8518 }, { "epoch": 1.9002899843854562, "grad_norm": 0.1771336793899536, "learning_rate": 6.010968626355509e-06, "loss": 0.4567, "step": 8519 }, { "epoch": 1.9005130492973454, "grad_norm": 0.18902990221977234, "learning_rate": 6.008810658571551e-06, "loss": 0.4537, "step": 8520 }, { "epoch": 1.900736114209235, "grad_norm": 0.20060394704341888, "learning_rate": 6.006652911844763e-06, "loss": 0.4792, "step": 8521 }, { "epoch": 1.9009591791211242, "grad_norm": 0.1900641918182373, "learning_rate": 6.004495386294657e-06, "loss": 0.4775, "step": 8522 }, { "epoch": 1.9011822440330137, "grad_norm": 0.18330518901348114, "learning_rate": 6.002338082040727e-06, "loss": 0.4522, "step": 8523 }, { "epoch": 1.901405308944903, "grad_norm": 0.18432478606700897, "learning_rate": 6.000180999202463e-06, "loss": 0.4619, "step": 8524 }, { "epoch": 1.9016283738567923, "grad_norm": 0.17643876373767853, "learning_rate": 5.998024137899333e-06, "loss": 0.4453, "step": 8525 }, { "epoch": 1.9018514387686816, "grad_norm": 0.17010371387004852, "learning_rate": 5.995867498250804e-06, "loss": 0.4264, "step": 8526 }, { "epoch": 1.902074503680571, "grad_norm": 0.17619560658931732, "learning_rate": 5.993711080376317e-06, "loss": 0.4522, "step": 8527 }, { "epoch": 1.9022975685924604, "grad_norm": 0.18597012758255005, "learning_rate": 5.99155488439531e-06, "loss": 0.4814, "step": 8528 }, { "epoch": 1.9025206335043499, "grad_norm": 0.20063801109790802, "learning_rate": 5.989398910427209e-06, "loss": 0.4966, "step": 8529 }, { "epoch": 1.9027436984162391, "grad_norm": 0.17774170637130737, "learning_rate": 5.987243158591422e-06, "loss": 0.436, "step": 8530 }, { "epoch": 1.9029667633281284, "grad_norm": 0.1837313175201416, "learning_rate": 5.985087629007353e-06, "loss": 0.4648, "step": 8531 }, { "epoch": 1.9031898282400177, "grad_norm": 0.17951352894306183, "learning_rate": 5.982932321794383e-06, "loss": 0.4547, "step": 8532 }, { "epoch": 1.9034128931519072, "grad_norm": 0.1875593215227127, "learning_rate": 5.980777237071891e-06, "loss": 0.4525, "step": 8533 }, { "epoch": 1.9036359580637967, "grad_norm": 0.177789106965065, "learning_rate": 5.978622374959235e-06, "loss": 0.4369, "step": 8534 }, { "epoch": 1.903859022975686, "grad_norm": 0.1993599534034729, "learning_rate": 5.9764677355757705e-06, "loss": 0.4327, "step": 8535 }, { "epoch": 1.9040820878875753, "grad_norm": 0.18298867344856262, "learning_rate": 5.97431331904083e-06, "loss": 0.4487, "step": 8536 }, { "epoch": 1.9043051527994646, "grad_norm": 0.19131208956241608, "learning_rate": 5.972159125473742e-06, "loss": 0.4426, "step": 8537 }, { "epoch": 1.904528217711354, "grad_norm": 0.1916476935148239, "learning_rate": 5.970005154993816e-06, "loss": 0.4456, "step": 8538 }, { "epoch": 1.9047512826232433, "grad_norm": 0.18648818135261536, "learning_rate": 5.9678514077203554e-06, "loss": 0.4564, "step": 8539 }, { "epoch": 1.9049743475351328, "grad_norm": 0.18742208182811737, "learning_rate": 5.965697883772648e-06, "loss": 0.4665, "step": 8540 }, { "epoch": 1.9051974124470221, "grad_norm": 0.20118087530136108, "learning_rate": 5.963544583269965e-06, "loss": 0.5012, "step": 8541 }, { "epoch": 1.9054204773589114, "grad_norm": 0.181925430893898, "learning_rate": 5.9613915063315775e-06, "loss": 0.4185, "step": 8542 }, { "epoch": 1.9056435422708007, "grad_norm": 0.18299917876720428, "learning_rate": 5.9592386530767285e-06, "loss": 0.4633, "step": 8543 }, { "epoch": 1.9058666071826902, "grad_norm": 0.1903630495071411, "learning_rate": 5.957086023624663e-06, "loss": 0.452, "step": 8544 }, { "epoch": 1.9060896720945795, "grad_norm": 0.17144551873207092, "learning_rate": 5.954933618094603e-06, "loss": 0.446, "step": 8545 }, { "epoch": 1.906312737006469, "grad_norm": 0.1903725415468216, "learning_rate": 5.952781436605766e-06, "loss": 0.4643, "step": 8546 }, { "epoch": 1.9065358019183583, "grad_norm": 0.17307879030704498, "learning_rate": 5.9506294792773475e-06, "loss": 0.4358, "step": 8547 }, { "epoch": 1.9067588668302475, "grad_norm": 0.18062752485275269, "learning_rate": 5.9484777462285425e-06, "loss": 0.4517, "step": 8548 }, { "epoch": 1.9069819317421368, "grad_norm": 0.19149060547351837, "learning_rate": 5.946326237578524e-06, "loss": 0.4513, "step": 8549 }, { "epoch": 1.9072049966540263, "grad_norm": 0.18597029149532318, "learning_rate": 5.944174953446457e-06, "loss": 0.4324, "step": 8550 }, { "epoch": 1.9074280615659158, "grad_norm": 0.17686405777931213, "learning_rate": 5.942023893951494e-06, "loss": 0.4175, "step": 8551 }, { "epoch": 1.907651126477805, "grad_norm": 0.17996470630168915, "learning_rate": 5.939873059212775e-06, "loss": 0.4548, "step": 8552 }, { "epoch": 1.9078741913896944, "grad_norm": 0.18042857944965363, "learning_rate": 5.937722449349421e-06, "loss": 0.438, "step": 8553 }, { "epoch": 1.9080972563015837, "grad_norm": 0.17731131613254547, "learning_rate": 5.935572064480555e-06, "loss": 0.4301, "step": 8554 }, { "epoch": 1.9083203212134732, "grad_norm": 0.18161912262439728, "learning_rate": 5.933421904725271e-06, "loss": 0.4805, "step": 8555 }, { "epoch": 1.9085433861253625, "grad_norm": 0.17587031424045563, "learning_rate": 5.931271970202664e-06, "loss": 0.4361, "step": 8556 }, { "epoch": 1.908766451037252, "grad_norm": 0.1959368884563446, "learning_rate": 5.929122261031806e-06, "loss": 0.4652, "step": 8557 }, { "epoch": 1.9089895159491412, "grad_norm": 0.17692820727825165, "learning_rate": 5.926972777331767e-06, "loss": 0.4492, "step": 8558 }, { "epoch": 1.9092125808610305, "grad_norm": 0.17724066972732544, "learning_rate": 5.924823519221593e-06, "loss": 0.4458, "step": 8559 }, { "epoch": 1.9094356457729198, "grad_norm": 0.17765964567661285, "learning_rate": 5.922674486820327e-06, "loss": 0.4259, "step": 8560 }, { "epoch": 1.9096587106848093, "grad_norm": 0.18779602646827698, "learning_rate": 5.920525680246997e-06, "loss": 0.4595, "step": 8561 }, { "epoch": 1.9098817755966988, "grad_norm": 0.17274630069732666, "learning_rate": 5.918377099620613e-06, "loss": 0.4408, "step": 8562 }, { "epoch": 1.910104840508588, "grad_norm": 0.1717062145471573, "learning_rate": 5.916228745060184e-06, "loss": 0.4308, "step": 8563 }, { "epoch": 1.9103279054204774, "grad_norm": 0.18208001554012299, "learning_rate": 5.914080616684691e-06, "loss": 0.4582, "step": 8564 }, { "epoch": 1.9105509703323666, "grad_norm": 0.18288151919841766, "learning_rate": 5.911932714613118e-06, "loss": 0.4404, "step": 8565 }, { "epoch": 1.910774035244256, "grad_norm": 0.1811714917421341, "learning_rate": 5.909785038964424e-06, "loss": 0.4544, "step": 8566 }, { "epoch": 1.9109971001561454, "grad_norm": 0.2010553777217865, "learning_rate": 5.907637589857565e-06, "loss": 0.4497, "step": 8567 }, { "epoch": 1.911220165068035, "grad_norm": 0.1751619428396225, "learning_rate": 5.905490367411475e-06, "loss": 0.4761, "step": 8568 }, { "epoch": 1.9114432299799242, "grad_norm": 0.1781492680311203, "learning_rate": 5.903343371745087e-06, "loss": 0.4393, "step": 8569 }, { "epoch": 1.9116662948918135, "grad_norm": 0.1823146939277649, "learning_rate": 5.9011966029773105e-06, "loss": 0.4438, "step": 8570 }, { "epoch": 1.9118893598037028, "grad_norm": 0.1732335388660431, "learning_rate": 5.89905006122705e-06, "loss": 0.4574, "step": 8571 }, { "epoch": 1.9121124247155923, "grad_norm": 0.1782243847846985, "learning_rate": 5.896903746613191e-06, "loss": 0.448, "step": 8572 }, { "epoch": 1.9123354896274816, "grad_norm": 0.16777978837490082, "learning_rate": 5.894757659254614e-06, "loss": 0.4136, "step": 8573 }, { "epoch": 1.912558554539371, "grad_norm": 0.18306882679462433, "learning_rate": 5.892611799270179e-06, "loss": 0.472, "step": 8574 }, { "epoch": 1.9127816194512604, "grad_norm": 0.19164350628852844, "learning_rate": 5.890466166778741e-06, "loss": 0.4304, "step": 8575 }, { "epoch": 1.9130046843631496, "grad_norm": 0.18419405817985535, "learning_rate": 5.888320761899136e-06, "loss": 0.4458, "step": 8576 }, { "epoch": 1.913227749275039, "grad_norm": 0.1842220425605774, "learning_rate": 5.886175584750191e-06, "loss": 0.4658, "step": 8577 }, { "epoch": 1.9134508141869284, "grad_norm": 0.18262866139411926, "learning_rate": 5.884030635450717e-06, "loss": 0.4389, "step": 8578 }, { "epoch": 1.913673879098818, "grad_norm": 0.17716680467128754, "learning_rate": 5.88188591411952e-06, "loss": 0.4465, "step": 8579 }, { "epoch": 1.9138969440107072, "grad_norm": 0.18368251621723175, "learning_rate": 5.879741420875382e-06, "loss": 0.4687, "step": 8580 }, { "epoch": 1.9141200089225965, "grad_norm": 0.18374212086200714, "learning_rate": 5.877597155837082e-06, "loss": 0.4255, "step": 8581 }, { "epoch": 1.9143430738344858, "grad_norm": 0.2865939736366272, "learning_rate": 5.8754531191233845e-06, "loss": 0.4244, "step": 8582 }, { "epoch": 1.914566138746375, "grad_norm": 0.16909492015838623, "learning_rate": 5.873309310853037e-06, "loss": 0.4432, "step": 8583 }, { "epoch": 1.9147892036582645, "grad_norm": 0.18193134665489197, "learning_rate": 5.8711657311447775e-06, "loss": 0.4461, "step": 8584 }, { "epoch": 1.915012268570154, "grad_norm": 0.1864008605480194, "learning_rate": 5.8690223801173305e-06, "loss": 0.4525, "step": 8585 }, { "epoch": 1.9152353334820433, "grad_norm": 0.17558574676513672, "learning_rate": 5.866879257889411e-06, "loss": 0.4376, "step": 8586 }, { "epoch": 1.9154583983939326, "grad_norm": 0.18040230870246887, "learning_rate": 5.864736364579714e-06, "loss": 0.4505, "step": 8587 }, { "epoch": 1.915681463305822, "grad_norm": 0.1805705428123474, "learning_rate": 5.862593700306931e-06, "loss": 0.4497, "step": 8588 }, { "epoch": 1.9159045282177114, "grad_norm": 0.17601102590560913, "learning_rate": 5.860451265189733e-06, "loss": 0.4287, "step": 8589 }, { "epoch": 1.9161275931296007, "grad_norm": 0.19040507078170776, "learning_rate": 5.858309059346784e-06, "loss": 0.4736, "step": 8590 }, { "epoch": 1.9163506580414902, "grad_norm": 0.18196603655815125, "learning_rate": 5.85616708289673e-06, "loss": 0.4527, "step": 8591 }, { "epoch": 1.9165737229533795, "grad_norm": 0.19250567257404327, "learning_rate": 5.85402533595821e-06, "loss": 0.4137, "step": 8592 }, { "epoch": 1.9167967878652687, "grad_norm": 0.1818055659532547, "learning_rate": 5.851883818649845e-06, "loss": 0.4603, "step": 8593 }, { "epoch": 1.917019852777158, "grad_norm": 0.21804414689540863, "learning_rate": 5.8497425310902474e-06, "loss": 0.4716, "step": 8594 }, { "epoch": 1.9172429176890475, "grad_norm": 0.19273175299167633, "learning_rate": 5.847601473398014e-06, "loss": 0.4345, "step": 8595 }, { "epoch": 1.917465982600937, "grad_norm": 0.18502821028232574, "learning_rate": 5.845460645691732e-06, "loss": 0.4634, "step": 8596 }, { "epoch": 1.9176890475128263, "grad_norm": 0.18303586542606354, "learning_rate": 5.843320048089971e-06, "loss": 0.4813, "step": 8597 }, { "epoch": 1.9179121124247156, "grad_norm": 0.18128132820129395, "learning_rate": 5.841179680711294e-06, "loss": 0.4677, "step": 8598 }, { "epoch": 1.9181351773366049, "grad_norm": 0.17710062861442566, "learning_rate": 5.839039543674245e-06, "loss": 0.4321, "step": 8599 }, { "epoch": 1.9183582422484942, "grad_norm": 0.187991201877594, "learning_rate": 5.836899637097358e-06, "loss": 0.4803, "step": 8600 }, { "epoch": 1.9185813071603837, "grad_norm": 0.17623665928840637, "learning_rate": 5.834759961099157e-06, "loss": 0.4064, "step": 8601 }, { "epoch": 1.9188043720722732, "grad_norm": 0.17650528252124786, "learning_rate": 5.832620515798154e-06, "loss": 0.463, "step": 8602 }, { "epoch": 1.9190274369841624, "grad_norm": 0.17875365912914276, "learning_rate": 5.830481301312836e-06, "loss": 0.4543, "step": 8603 }, { "epoch": 1.9192505018960517, "grad_norm": 0.2137259989976883, "learning_rate": 5.828342317761692e-06, "loss": 0.4586, "step": 8604 }, { "epoch": 1.919473566807941, "grad_norm": 0.18043895065784454, "learning_rate": 5.8262035652631896e-06, "loss": 0.4573, "step": 8605 }, { "epoch": 1.9196966317198305, "grad_norm": 0.18645495176315308, "learning_rate": 5.824065043935794e-06, "loss": 0.4812, "step": 8606 }, { "epoch": 1.9199196966317198, "grad_norm": 0.18367193639278412, "learning_rate": 5.821926753897942e-06, "loss": 0.4644, "step": 8607 }, { "epoch": 1.9201427615436093, "grad_norm": 0.17733469605445862, "learning_rate": 5.819788695268065e-06, "loss": 0.4581, "step": 8608 }, { "epoch": 1.9203658264554986, "grad_norm": 0.17940470576286316, "learning_rate": 5.817650868164584e-06, "loss": 0.4452, "step": 8609 }, { "epoch": 1.9205888913673879, "grad_norm": 0.1958422213792801, "learning_rate": 5.815513272705905e-06, "loss": 0.47, "step": 8610 }, { "epoch": 1.9208119562792771, "grad_norm": 0.21605324745178223, "learning_rate": 5.813375909010427e-06, "loss": 0.4444, "step": 8611 }, { "epoch": 1.9210350211911666, "grad_norm": 0.18394535779953003, "learning_rate": 5.811238777196522e-06, "loss": 0.4527, "step": 8612 }, { "epoch": 1.9212580861030562, "grad_norm": 0.17584146559238434, "learning_rate": 5.809101877382562e-06, "loss": 0.4181, "step": 8613 }, { "epoch": 1.9214811510149454, "grad_norm": 0.1733761727809906, "learning_rate": 5.806965209686903e-06, "loss": 0.4431, "step": 8614 }, { "epoch": 1.9217042159268347, "grad_norm": 0.17333033680915833, "learning_rate": 5.8048287742278886e-06, "loss": 0.4301, "step": 8615 }, { "epoch": 1.921927280838724, "grad_norm": 0.17815978825092316, "learning_rate": 5.802692571123843e-06, "loss": 0.4417, "step": 8616 }, { "epoch": 1.9221503457506133, "grad_norm": 0.17262916266918182, "learning_rate": 5.8005566004930826e-06, "loss": 0.424, "step": 8617 }, { "epoch": 1.9223734106625028, "grad_norm": 0.1765177696943283, "learning_rate": 5.798420862453914e-06, "loss": 0.4553, "step": 8618 }, { "epoch": 1.9225964755743923, "grad_norm": 0.18109013140201569, "learning_rate": 5.796285357124632e-06, "loss": 0.4362, "step": 8619 }, { "epoch": 1.9228195404862816, "grad_norm": 0.2021632194519043, "learning_rate": 5.7941500846235045e-06, "loss": 0.4319, "step": 8620 }, { "epoch": 1.9230426053981708, "grad_norm": 0.18348877131938934, "learning_rate": 5.7920150450688e-06, "loss": 0.4166, "step": 8621 }, { "epoch": 1.9232656703100601, "grad_norm": 0.18288786709308624, "learning_rate": 5.789880238578773e-06, "loss": 0.4756, "step": 8622 }, { "epoch": 1.9234887352219496, "grad_norm": 0.2258935421705246, "learning_rate": 5.787745665271664e-06, "loss": 0.4458, "step": 8623 }, { "epoch": 1.923711800133839, "grad_norm": 0.19700488448143005, "learning_rate": 5.785611325265694e-06, "loss": 0.4566, "step": 8624 }, { "epoch": 1.9239348650457284, "grad_norm": 0.177838996052742, "learning_rate": 5.783477218679078e-06, "loss": 0.4348, "step": 8625 }, { "epoch": 1.9241579299576177, "grad_norm": 0.18436270952224731, "learning_rate": 5.781343345630016e-06, "loss": 0.4323, "step": 8626 }, { "epoch": 1.924380994869507, "grad_norm": 0.18075759708881378, "learning_rate": 5.779209706236696e-06, "loss": 0.4355, "step": 8627 }, { "epoch": 1.9246040597813963, "grad_norm": 0.18219725787639618, "learning_rate": 5.7770763006172966e-06, "loss": 0.4712, "step": 8628 }, { "epoch": 1.9248271246932858, "grad_norm": 0.18282842636108398, "learning_rate": 5.77494312888997e-06, "loss": 0.4449, "step": 8629 }, { "epoch": 1.9250501896051753, "grad_norm": 0.19060496985912323, "learning_rate": 5.77281019117287e-06, "loss": 0.4629, "step": 8630 }, { "epoch": 1.9252732545170645, "grad_norm": 0.1835882067680359, "learning_rate": 5.770677487584133e-06, "loss": 0.4504, "step": 8631 }, { "epoch": 1.9254963194289538, "grad_norm": 0.19525527954101562, "learning_rate": 5.7685450182418825e-06, "loss": 0.4748, "step": 8632 }, { "epoch": 1.925719384340843, "grad_norm": 0.18883006274700165, "learning_rate": 5.7664127832642225e-06, "loss": 0.4342, "step": 8633 }, { "epoch": 1.9259424492527324, "grad_norm": 0.18440766632556915, "learning_rate": 5.764280782769254e-06, "loss": 0.4448, "step": 8634 }, { "epoch": 1.926165514164622, "grad_norm": 0.18678854405879974, "learning_rate": 5.762149016875057e-06, "loss": 0.4417, "step": 8635 }, { "epoch": 1.9263885790765114, "grad_norm": 0.1747465282678604, "learning_rate": 5.760017485699711e-06, "loss": 0.442, "step": 8636 }, { "epoch": 1.9266116439884007, "grad_norm": 0.17557400465011597, "learning_rate": 5.757886189361261e-06, "loss": 0.4598, "step": 8637 }, { "epoch": 1.92683470890029, "grad_norm": 0.1763993501663208, "learning_rate": 5.755755127977759e-06, "loss": 0.4279, "step": 8638 }, { "epoch": 1.9270577738121792, "grad_norm": 0.18249794840812683, "learning_rate": 5.753624301667235e-06, "loss": 0.4529, "step": 8639 }, { "epoch": 1.9272808387240687, "grad_norm": 0.16952653229236603, "learning_rate": 5.751493710547713e-06, "loss": 0.4208, "step": 8640 }, { "epoch": 1.927503903635958, "grad_norm": 0.1794244945049286, "learning_rate": 5.749363354737188e-06, "loss": 0.456, "step": 8641 }, { "epoch": 1.9277269685478475, "grad_norm": 0.1846603900194168, "learning_rate": 5.74723323435366e-06, "loss": 0.4559, "step": 8642 }, { "epoch": 1.9279500334597368, "grad_norm": 0.18191924691200256, "learning_rate": 5.745103349515104e-06, "loss": 0.4466, "step": 8643 }, { "epoch": 1.928173098371626, "grad_norm": 0.1772155612707138, "learning_rate": 5.742973700339494e-06, "loss": 0.4005, "step": 8644 }, { "epoch": 1.9283961632835154, "grad_norm": 0.17686043679714203, "learning_rate": 5.740844286944775e-06, "loss": 0.4279, "step": 8645 }, { "epoch": 1.9286192281954049, "grad_norm": 0.19069218635559082, "learning_rate": 5.738715109448889e-06, "loss": 0.4725, "step": 8646 }, { "epoch": 1.9288422931072944, "grad_norm": 0.17691883444786072, "learning_rate": 5.736586167969767e-06, "loss": 0.4383, "step": 8647 }, { "epoch": 1.9290653580191837, "grad_norm": 0.1703954041004181, "learning_rate": 5.734457462625318e-06, "loss": 0.4225, "step": 8648 }, { "epoch": 1.929288422931073, "grad_norm": 0.17984159290790558, "learning_rate": 5.732328993533452e-06, "loss": 0.4681, "step": 8649 }, { "epoch": 1.9295114878429622, "grad_norm": 0.18141353130340576, "learning_rate": 5.730200760812045e-06, "loss": 0.4654, "step": 8650 }, { "epoch": 1.9297345527548515, "grad_norm": 0.19043731689453125, "learning_rate": 5.72807276457898e-06, "loss": 0.4443, "step": 8651 }, { "epoch": 1.929957617666741, "grad_norm": 0.6355077624320984, "learning_rate": 5.725945004952118e-06, "loss": 0.4525, "step": 8652 }, { "epoch": 1.9301806825786305, "grad_norm": 0.17925289273262024, "learning_rate": 5.723817482049307e-06, "loss": 0.4438, "step": 8653 }, { "epoch": 1.9304037474905198, "grad_norm": 0.1707497388124466, "learning_rate": 5.721690195988381e-06, "loss": 0.4078, "step": 8654 }, { "epoch": 1.930626812402409, "grad_norm": 0.1779852956533432, "learning_rate": 5.719563146887163e-06, "loss": 0.4304, "step": 8655 }, { "epoch": 1.9308498773142984, "grad_norm": 0.18374204635620117, "learning_rate": 5.717436334863462e-06, "loss": 0.4503, "step": 8656 }, { "epoch": 1.9310729422261879, "grad_norm": 0.18949589133262634, "learning_rate": 5.715309760035079e-06, "loss": 0.4901, "step": 8657 }, { "epoch": 1.9312960071380771, "grad_norm": 0.1836424320936203, "learning_rate": 5.71318342251979e-06, "loss": 0.462, "step": 8658 }, { "epoch": 1.9315190720499666, "grad_norm": 0.1780422180891037, "learning_rate": 5.711057322435369e-06, "loss": 0.4386, "step": 8659 }, { "epoch": 1.931742136961856, "grad_norm": 0.17757810652256012, "learning_rate": 5.708931459899571e-06, "loss": 0.4421, "step": 8660 }, { "epoch": 1.9319652018737452, "grad_norm": 0.1806020736694336, "learning_rate": 5.7068058350301445e-06, "loss": 0.4349, "step": 8661 }, { "epoch": 1.9321882667856345, "grad_norm": 0.1809801310300827, "learning_rate": 5.704680447944812e-06, "loss": 0.4557, "step": 8662 }, { "epoch": 1.932411331697524, "grad_norm": 0.22528956830501556, "learning_rate": 5.702555298761297e-06, "loss": 0.4204, "step": 8663 }, { "epoch": 1.9326343966094135, "grad_norm": 0.17288967967033386, "learning_rate": 5.700430387597299e-06, "loss": 0.4379, "step": 8664 }, { "epoch": 1.9328574615213028, "grad_norm": 0.18082986772060394, "learning_rate": 5.698305714570517e-06, "loss": 0.4727, "step": 8665 }, { "epoch": 1.933080526433192, "grad_norm": 0.17780068516731262, "learning_rate": 5.696181279798618e-06, "loss": 0.4321, "step": 8666 }, { "epoch": 1.9333035913450813, "grad_norm": 0.17463639378547668, "learning_rate": 5.694057083399272e-06, "loss": 0.4647, "step": 8667 }, { "epoch": 1.9335266562569706, "grad_norm": 0.18342427909374237, "learning_rate": 5.691933125490131e-06, "loss": 0.434, "step": 8668 }, { "epoch": 1.9337497211688601, "grad_norm": 0.1869836300611496, "learning_rate": 5.689809406188832e-06, "loss": 0.4545, "step": 8669 }, { "epoch": 1.9339727860807496, "grad_norm": 0.19317130744457245, "learning_rate": 5.687685925613005e-06, "loss": 0.4475, "step": 8670 }, { "epoch": 1.934195850992639, "grad_norm": 0.18191368877887726, "learning_rate": 5.685562683880252e-06, "loss": 0.4646, "step": 8671 }, { "epoch": 1.9344189159045282, "grad_norm": 0.1778983622789383, "learning_rate": 5.683439681108176e-06, "loss": 0.4122, "step": 8672 }, { "epoch": 1.9346419808164175, "grad_norm": 0.18883691728115082, "learning_rate": 5.681316917414363e-06, "loss": 0.4493, "step": 8673 }, { "epoch": 1.934865045728307, "grad_norm": 0.1791423112154007, "learning_rate": 5.6791943929163895e-06, "loss": 0.4312, "step": 8674 }, { "epoch": 1.9350881106401963, "grad_norm": 0.1763908416032791, "learning_rate": 5.677072107731805e-06, "loss": 0.4833, "step": 8675 }, { "epoch": 1.9353111755520858, "grad_norm": 0.17612230777740479, "learning_rate": 5.6749500619781595e-06, "loss": 0.4412, "step": 8676 }, { "epoch": 1.935534240463975, "grad_norm": 0.1743626892566681, "learning_rate": 5.672828255772986e-06, "loss": 0.4195, "step": 8677 }, { "epoch": 1.9357573053758643, "grad_norm": 0.18442870676517487, "learning_rate": 5.6707066892338054e-06, "loss": 0.4564, "step": 8678 }, { "epoch": 1.9359803702877536, "grad_norm": 0.2595793306827545, "learning_rate": 5.668585362478117e-06, "loss": 0.4738, "step": 8679 }, { "epoch": 1.936203435199643, "grad_norm": 0.19340401887893677, "learning_rate": 5.666464275623418e-06, "loss": 0.4409, "step": 8680 }, { "epoch": 1.9364265001115326, "grad_norm": 0.17830665409564972, "learning_rate": 5.664343428787185e-06, "loss": 0.4476, "step": 8681 }, { "epoch": 1.936649565023422, "grad_norm": 0.18472926318645477, "learning_rate": 5.66222282208689e-06, "loss": 0.4597, "step": 8682 }, { "epoch": 1.9368726299353112, "grad_norm": 0.18445588648319244, "learning_rate": 5.660102455639978e-06, "loss": 0.4724, "step": 8683 }, { "epoch": 1.9370956948472005, "grad_norm": 0.20936332643032074, "learning_rate": 5.657982329563889e-06, "loss": 0.4823, "step": 8684 }, { "epoch": 1.9373187597590897, "grad_norm": 0.17280444502830505, "learning_rate": 5.655862443976051e-06, "loss": 0.4476, "step": 8685 }, { "epoch": 1.9375418246709792, "grad_norm": 0.18051601946353912, "learning_rate": 5.653742798993877e-06, "loss": 0.4426, "step": 8686 }, { "epoch": 1.9377648895828687, "grad_norm": 0.18293160200119019, "learning_rate": 5.651623394734771e-06, "loss": 0.4589, "step": 8687 }, { "epoch": 1.937987954494758, "grad_norm": 0.18495197594165802, "learning_rate": 5.649504231316109e-06, "loss": 0.463, "step": 8688 }, { "epoch": 1.9382110194066473, "grad_norm": 0.18280479311943054, "learning_rate": 5.647385308855271e-06, "loss": 0.457, "step": 8689 }, { "epoch": 1.9384340843185366, "grad_norm": 0.1783073991537094, "learning_rate": 5.645266627469612e-06, "loss": 0.4672, "step": 8690 }, { "epoch": 1.938657149230426, "grad_norm": 0.18390284478664398, "learning_rate": 5.643148187276477e-06, "loss": 0.47, "step": 8691 }, { "epoch": 1.9388802141423154, "grad_norm": 0.176648810505867, "learning_rate": 5.641029988393208e-06, "loss": 0.4732, "step": 8692 }, { "epoch": 1.9391032790542049, "grad_norm": 0.19437986612319946, "learning_rate": 5.638912030937112e-06, "loss": 0.4631, "step": 8693 }, { "epoch": 1.9393263439660942, "grad_norm": 0.1740999072790146, "learning_rate": 5.636794315025499e-06, "loss": 0.4326, "step": 8694 }, { "epoch": 1.9395494088779834, "grad_norm": 0.17980170249938965, "learning_rate": 5.634676840775664e-06, "loss": 0.4373, "step": 8695 }, { "epoch": 1.9397724737898727, "grad_norm": 0.1797953099012375, "learning_rate": 5.632559608304886e-06, "loss": 0.4681, "step": 8696 }, { "epoch": 1.9399955387017622, "grad_norm": 0.17781023681163788, "learning_rate": 5.630442617730427e-06, "loss": 0.447, "step": 8697 }, { "epoch": 1.9402186036136517, "grad_norm": 0.19193176925182343, "learning_rate": 5.628325869169542e-06, "loss": 0.4399, "step": 8698 }, { "epoch": 1.940441668525541, "grad_norm": 0.18457980453968048, "learning_rate": 5.62620936273947e-06, "loss": 0.4717, "step": 8699 }, { "epoch": 1.9406647334374303, "grad_norm": 0.1887241154909134, "learning_rate": 5.624093098557437e-06, "loss": 0.4239, "step": 8700 }, { "epoch": 1.9408877983493196, "grad_norm": 0.18782730400562286, "learning_rate": 5.621977076740652e-06, "loss": 0.4417, "step": 8701 }, { "epoch": 1.9411108632612089, "grad_norm": 0.21468473970890045, "learning_rate": 5.619861297406315e-06, "loss": 0.4635, "step": 8702 }, { "epoch": 1.9413339281730984, "grad_norm": 0.18426984548568726, "learning_rate": 5.61774576067161e-06, "loss": 0.435, "step": 8703 }, { "epoch": 1.9415569930849879, "grad_norm": 0.17525528371334076, "learning_rate": 5.6156304666537166e-06, "loss": 0.4459, "step": 8704 }, { "epoch": 1.9417800579968771, "grad_norm": 0.17843464016914368, "learning_rate": 5.613515415469782e-06, "loss": 0.4455, "step": 8705 }, { "epoch": 1.9420031229087664, "grad_norm": 0.1760268360376358, "learning_rate": 5.6114006072369544e-06, "loss": 0.4394, "step": 8706 }, { "epoch": 1.9422261878206557, "grad_norm": 0.18088506162166595, "learning_rate": 5.609286042072366e-06, "loss": 0.4819, "step": 8707 }, { "epoch": 1.9424492527325452, "grad_norm": 0.17896127700805664, "learning_rate": 5.607171720093141e-06, "loss": 0.4474, "step": 8708 }, { "epoch": 1.9426723176444345, "grad_norm": 0.1753784865140915, "learning_rate": 5.605057641416374e-06, "loss": 0.4743, "step": 8709 }, { "epoch": 1.942895382556324, "grad_norm": 0.1875680536031723, "learning_rate": 5.60294380615916e-06, "loss": 0.4491, "step": 8710 }, { "epoch": 1.9431184474682133, "grad_norm": 0.18972131609916687, "learning_rate": 5.600830214438577e-06, "loss": 0.4332, "step": 8711 }, { "epoch": 1.9433415123801026, "grad_norm": 0.25385287404060364, "learning_rate": 5.598716866371688e-06, "loss": 0.4125, "step": 8712 }, { "epoch": 1.9435645772919918, "grad_norm": 0.19474051892757416, "learning_rate": 5.5966037620755475e-06, "loss": 0.4405, "step": 8713 }, { "epoch": 1.9437876422038813, "grad_norm": 0.18606369197368622, "learning_rate": 5.594490901667187e-06, "loss": 0.4364, "step": 8714 }, { "epoch": 1.9440107071157708, "grad_norm": 0.17978952825069427, "learning_rate": 5.592378285263631e-06, "loss": 0.446, "step": 8715 }, { "epoch": 1.9442337720276601, "grad_norm": 0.17850260436534882, "learning_rate": 5.5902659129818896e-06, "loss": 0.4761, "step": 8716 }, { "epoch": 1.9444568369395494, "grad_norm": 0.1837419867515564, "learning_rate": 5.588153784938966e-06, "loss": 0.4414, "step": 8717 }, { "epoch": 1.9446799018514387, "grad_norm": 0.18115438520908356, "learning_rate": 5.586041901251832e-06, "loss": 0.4248, "step": 8718 }, { "epoch": 1.944902966763328, "grad_norm": 0.18175376951694489, "learning_rate": 5.5839302620374625e-06, "loss": 0.4587, "step": 8719 }, { "epoch": 1.9451260316752175, "grad_norm": 0.18459230661392212, "learning_rate": 5.581818867412814e-06, "loss": 0.4576, "step": 8720 }, { "epoch": 1.945349096587107, "grad_norm": 0.17397940158843994, "learning_rate": 5.5797077174948325e-06, "loss": 0.4568, "step": 8721 }, { "epoch": 1.9455721614989963, "grad_norm": 0.18246999382972717, "learning_rate": 5.577596812400438e-06, "loss": 0.4431, "step": 8722 }, { "epoch": 1.9457952264108855, "grad_norm": 0.18297289311885834, "learning_rate": 5.57548615224655e-06, "loss": 0.4615, "step": 8723 }, { "epoch": 1.9460182913227748, "grad_norm": 0.17768624424934387, "learning_rate": 5.57337573715007e-06, "loss": 0.45, "step": 8724 }, { "epoch": 1.9462413562346643, "grad_norm": 0.17627879977226257, "learning_rate": 5.5712655672278905e-06, "loss": 0.4253, "step": 8725 }, { "epoch": 1.9464644211465536, "grad_norm": 0.1850820928812027, "learning_rate": 5.569155642596878e-06, "loss": 0.4427, "step": 8726 }, { "epoch": 1.946687486058443, "grad_norm": 0.2031596601009369, "learning_rate": 5.567045963373896e-06, "loss": 0.5089, "step": 8727 }, { "epoch": 1.9469105509703324, "grad_norm": 0.17893202602863312, "learning_rate": 5.564936529675793e-06, "loss": 0.4536, "step": 8728 }, { "epoch": 1.9471336158822217, "grad_norm": 0.19759956002235413, "learning_rate": 5.562827341619407e-06, "loss": 0.458, "step": 8729 }, { "epoch": 1.947356680794111, "grad_norm": 0.18113680183887482, "learning_rate": 5.560718399321549e-06, "loss": 0.4763, "step": 8730 }, { "epoch": 1.9475797457060005, "grad_norm": 0.1818569451570511, "learning_rate": 5.558609702899032e-06, "loss": 0.4484, "step": 8731 }, { "epoch": 1.94780281061789, "grad_norm": 0.1860044002532959, "learning_rate": 5.556501252468647e-06, "loss": 0.4312, "step": 8732 }, { "epoch": 1.9480258755297792, "grad_norm": 0.18196377158164978, "learning_rate": 5.554393048147172e-06, "loss": 0.4708, "step": 8733 }, { "epoch": 1.9482489404416685, "grad_norm": 0.18474991619586945, "learning_rate": 5.552285090051379e-06, "loss": 0.4675, "step": 8734 }, { "epoch": 1.9484720053535578, "grad_norm": 0.2845540940761566, "learning_rate": 5.55017737829801e-06, "loss": 0.4609, "step": 8735 }, { "epoch": 1.948695070265447, "grad_norm": 0.17991533875465393, "learning_rate": 5.548069913003808e-06, "loss": 0.4612, "step": 8736 }, { "epoch": 1.9489181351773366, "grad_norm": 0.1794772446155548, "learning_rate": 5.5459626942855e-06, "loss": 0.429, "step": 8737 }, { "epoch": 1.949141200089226, "grad_norm": 0.18330740928649902, "learning_rate": 5.543855722259797e-06, "loss": 0.4359, "step": 8738 }, { "epoch": 1.9493642650011154, "grad_norm": 0.17791210114955902, "learning_rate": 5.541748997043392e-06, "loss": 0.4532, "step": 8739 }, { "epoch": 1.9495873299130047, "grad_norm": 0.18846653401851654, "learning_rate": 5.5396425187529705e-06, "loss": 0.4784, "step": 8740 }, { "epoch": 1.949810394824894, "grad_norm": 0.18275344371795654, "learning_rate": 5.537536287505203e-06, "loss": 0.4651, "step": 8741 }, { "epoch": 1.9500334597367834, "grad_norm": 0.28151261806488037, "learning_rate": 5.5354303034167486e-06, "loss": 0.4302, "step": 8742 }, { "epoch": 1.9502565246486727, "grad_norm": 0.1811457872390747, "learning_rate": 5.533324566604246e-06, "loss": 0.4572, "step": 8743 }, { "epoch": 1.9504795895605622, "grad_norm": 0.18251028656959534, "learning_rate": 5.531219077184322e-06, "loss": 0.4651, "step": 8744 }, { "epoch": 1.9507026544724515, "grad_norm": 0.18917511403560638, "learning_rate": 5.529113835273599e-06, "loss": 0.431, "step": 8745 }, { "epoch": 1.9509257193843408, "grad_norm": 0.19956184923648834, "learning_rate": 5.527008840988676e-06, "loss": 0.4789, "step": 8746 }, { "epoch": 1.95114878429623, "grad_norm": 0.17988136410713196, "learning_rate": 5.5249040944461366e-06, "loss": 0.4427, "step": 8747 }, { "epoch": 1.9513718492081196, "grad_norm": 0.18015359342098236, "learning_rate": 5.522799595762558e-06, "loss": 0.4454, "step": 8748 }, { "epoch": 1.951594914120009, "grad_norm": 0.17955633997917175, "learning_rate": 5.5206953450545e-06, "loss": 0.4477, "step": 8749 }, { "epoch": 1.9518179790318984, "grad_norm": 0.18171581625938416, "learning_rate": 5.518591342438513e-06, "loss": 0.4489, "step": 8750 }, { "epoch": 1.9520410439437876, "grad_norm": 0.1831309199333191, "learning_rate": 5.516487588031124e-06, "loss": 0.4686, "step": 8751 }, { "epoch": 1.952264108855677, "grad_norm": 0.1874886304140091, "learning_rate": 5.514384081948855e-06, "loss": 0.4631, "step": 8752 }, { "epoch": 1.9524871737675662, "grad_norm": 0.19060033559799194, "learning_rate": 5.51228082430821e-06, "loss": 0.4694, "step": 8753 }, { "epoch": 1.9527102386794557, "grad_norm": 0.18917007744312286, "learning_rate": 5.510177815225683e-06, "loss": 0.4681, "step": 8754 }, { "epoch": 1.9529333035913452, "grad_norm": 0.1917768120765686, "learning_rate": 5.508075054817755e-06, "loss": 0.4628, "step": 8755 }, { "epoch": 1.9531563685032345, "grad_norm": 0.18906119465827942, "learning_rate": 5.505972543200879e-06, "loss": 0.4553, "step": 8756 }, { "epoch": 1.9533794334151238, "grad_norm": 0.18696992099285126, "learning_rate": 5.503870280491514e-06, "loss": 0.446, "step": 8757 }, { "epoch": 1.953602498327013, "grad_norm": 0.18778546154499054, "learning_rate": 5.501768266806095e-06, "loss": 0.4418, "step": 8758 }, { "epoch": 1.9538255632389026, "grad_norm": 0.182692751288414, "learning_rate": 5.4996665022610495e-06, "loss": 0.4688, "step": 8759 }, { "epoch": 1.9540486281507918, "grad_norm": 0.20348599553108215, "learning_rate": 5.497564986972775e-06, "loss": 0.4815, "step": 8760 }, { "epoch": 1.9542716930626813, "grad_norm": 0.1796850711107254, "learning_rate": 5.495463721057675e-06, "loss": 0.4688, "step": 8761 }, { "epoch": 1.9544947579745706, "grad_norm": 0.1841420829296112, "learning_rate": 5.493362704632127e-06, "loss": 0.4444, "step": 8762 }, { "epoch": 1.95471782288646, "grad_norm": 0.20517012476921082, "learning_rate": 5.491261937812504e-06, "loss": 0.4471, "step": 8763 }, { "epoch": 1.9549408877983492, "grad_norm": 0.1862691342830658, "learning_rate": 5.489161420715154e-06, "loss": 0.4669, "step": 8764 }, { "epoch": 1.9551639527102387, "grad_norm": 0.1903495490550995, "learning_rate": 5.487061153456418e-06, "loss": 0.4476, "step": 8765 }, { "epoch": 1.9553870176221282, "grad_norm": 0.17540110647678375, "learning_rate": 5.484961136152622e-06, "loss": 0.4416, "step": 8766 }, { "epoch": 1.9556100825340175, "grad_norm": 0.18369555473327637, "learning_rate": 5.482861368920084e-06, "loss": 0.4645, "step": 8767 }, { "epoch": 1.9558331474459068, "grad_norm": 0.17829713225364685, "learning_rate": 5.4807618518750925e-06, "loss": 0.4156, "step": 8768 }, { "epoch": 1.956056212357796, "grad_norm": 0.17648525536060333, "learning_rate": 5.478662585133938e-06, "loss": 0.4493, "step": 8769 }, { "epoch": 1.9562792772696853, "grad_norm": 0.18088924884796143, "learning_rate": 5.476563568812893e-06, "loss": 0.455, "step": 8770 }, { "epoch": 1.9565023421815748, "grad_norm": 0.18878251314163208, "learning_rate": 5.474464803028206e-06, "loss": 0.4492, "step": 8771 }, { "epoch": 1.9567254070934643, "grad_norm": 0.19534710049629211, "learning_rate": 5.472366287896132e-06, "loss": 0.4368, "step": 8772 }, { "epoch": 1.9569484720053536, "grad_norm": 0.1891869604587555, "learning_rate": 5.470268023532885e-06, "loss": 0.4573, "step": 8773 }, { "epoch": 1.9571715369172429, "grad_norm": 0.1803818792104721, "learning_rate": 5.468170010054691e-06, "loss": 0.4338, "step": 8774 }, { "epoch": 1.9573946018291322, "grad_norm": 0.17830346524715424, "learning_rate": 5.4660722475777475e-06, "loss": 0.4367, "step": 8775 }, { "epoch": 1.9576176667410217, "grad_norm": 0.1889098584651947, "learning_rate": 5.463974736218243e-06, "loss": 0.4494, "step": 8776 }, { "epoch": 1.957840731652911, "grad_norm": 0.1826130598783493, "learning_rate": 5.461877476092352e-06, "loss": 0.4531, "step": 8777 }, { "epoch": 1.9580637965648005, "grad_norm": 0.17219486832618713, "learning_rate": 5.4597804673162315e-06, "loss": 0.4272, "step": 8778 }, { "epoch": 1.9582868614766897, "grad_norm": 0.18349269032478333, "learning_rate": 5.457683710006026e-06, "loss": 0.4443, "step": 8779 }, { "epoch": 1.958509926388579, "grad_norm": 0.1871849000453949, "learning_rate": 5.455587204277868e-06, "loss": 0.4588, "step": 8780 }, { "epoch": 1.9587329913004683, "grad_norm": 0.18738840520381927, "learning_rate": 5.453490950247882e-06, "loss": 0.478, "step": 8781 }, { "epoch": 1.9589560562123578, "grad_norm": 0.17778688669204712, "learning_rate": 5.45139494803216e-06, "loss": 0.428, "step": 8782 }, { "epoch": 1.9591791211242473, "grad_norm": 0.18447647988796234, "learning_rate": 5.4492991977468e-06, "loss": 0.4422, "step": 8783 }, { "epoch": 1.9594021860361366, "grad_norm": 0.19066467881202698, "learning_rate": 5.4472036995078734e-06, "loss": 0.4328, "step": 8784 }, { "epoch": 1.9596252509480259, "grad_norm": 0.171788290143013, "learning_rate": 5.445108453431449e-06, "loss": 0.428, "step": 8785 }, { "epoch": 1.9598483158599151, "grad_norm": 0.1862727701663971, "learning_rate": 5.443013459633566e-06, "loss": 0.4497, "step": 8786 }, { "epoch": 1.9600713807718046, "grad_norm": 0.19083817303180695, "learning_rate": 5.440918718230263e-06, "loss": 0.4435, "step": 8787 }, { "epoch": 1.960294445683694, "grad_norm": 0.18043813109397888, "learning_rate": 5.438824229337558e-06, "loss": 0.4386, "step": 8788 }, { "epoch": 1.9605175105955834, "grad_norm": 0.17163397371768951, "learning_rate": 5.4367299930714615e-06, "loss": 0.4406, "step": 8789 }, { "epoch": 1.9607405755074727, "grad_norm": 0.17972704768180847, "learning_rate": 5.43463600954796e-06, "loss": 0.4606, "step": 8790 }, { "epoch": 1.960963640419362, "grad_norm": 0.1928553283214569, "learning_rate": 5.432542278883031e-06, "loss": 0.4587, "step": 8791 }, { "epoch": 1.9611867053312513, "grad_norm": 0.18378639221191406, "learning_rate": 5.430448801192642e-06, "loss": 0.4554, "step": 8792 }, { "epoch": 1.9614097702431408, "grad_norm": 0.18218287825584412, "learning_rate": 5.428355576592748e-06, "loss": 0.4634, "step": 8793 }, { "epoch": 1.96163283515503, "grad_norm": 0.18878161907196045, "learning_rate": 5.426262605199273e-06, "loss": 0.4567, "step": 8794 }, { "epoch": 1.9618559000669196, "grad_norm": 0.17760403454303741, "learning_rate": 5.424169887128145e-06, "loss": 0.4414, "step": 8795 }, { "epoch": 1.9620789649788088, "grad_norm": 0.18763794004917145, "learning_rate": 5.4220774224952735e-06, "loss": 0.4499, "step": 8796 }, { "epoch": 1.9623020298906981, "grad_norm": 0.18167226016521454, "learning_rate": 5.419985211416548e-06, "loss": 0.4325, "step": 8797 }, { "epoch": 1.9625250948025874, "grad_norm": 0.18740572035312653, "learning_rate": 5.417893254007857e-06, "loss": 0.453, "step": 8798 }, { "epoch": 1.962748159714477, "grad_norm": 0.18634778261184692, "learning_rate": 5.415801550385056e-06, "loss": 0.4658, "step": 8799 }, { "epoch": 1.9629712246263664, "grad_norm": 0.1778239905834198, "learning_rate": 5.4137101006639994e-06, "loss": 0.4438, "step": 8800 }, { "epoch": 1.9631942895382557, "grad_norm": 0.17299148440361023, "learning_rate": 5.411618904960527e-06, "loss": 0.4038, "step": 8801 }, { "epoch": 1.963417354450145, "grad_norm": 0.17823709547519684, "learning_rate": 5.4095279633904665e-06, "loss": 0.412, "step": 8802 }, { "epoch": 1.9636404193620343, "grad_norm": 0.18503381311893463, "learning_rate": 5.407437276069618e-06, "loss": 0.4553, "step": 8803 }, { "epoch": 1.9638634842739238, "grad_norm": 0.17986641824245453, "learning_rate": 5.4053468431137814e-06, "loss": 0.4714, "step": 8804 }, { "epoch": 1.964086549185813, "grad_norm": 0.17366492748260498, "learning_rate": 5.403256664638738e-06, "loss": 0.4264, "step": 8805 }, { "epoch": 1.9643096140977025, "grad_norm": 0.18994669616222382, "learning_rate": 5.40116674076026e-06, "loss": 0.447, "step": 8806 }, { "epoch": 1.9645326790095918, "grad_norm": 0.17680832743644714, "learning_rate": 5.3990770715940895e-06, "loss": 0.4308, "step": 8807 }, { "epoch": 1.9647557439214811, "grad_norm": 0.17399604618549347, "learning_rate": 5.396987657255974e-06, "loss": 0.4405, "step": 8808 }, { "epoch": 1.9649788088333704, "grad_norm": 0.21814000606536865, "learning_rate": 5.3948984978616355e-06, "loss": 0.4643, "step": 8809 }, { "epoch": 1.96520187374526, "grad_norm": 0.17908701300621033, "learning_rate": 5.39280959352679e-06, "loss": 0.4189, "step": 8810 }, { "epoch": 1.9654249386571492, "grad_norm": 0.17751288414001465, "learning_rate": 5.390720944367125e-06, "loss": 0.4502, "step": 8811 }, { "epoch": 1.9656480035690387, "grad_norm": 0.21767963469028473, "learning_rate": 5.388632550498327e-06, "loss": 0.4495, "step": 8812 }, { "epoch": 1.965871068480928, "grad_norm": 0.1864757090806961, "learning_rate": 5.386544412036066e-06, "loss": 0.4775, "step": 8813 }, { "epoch": 1.9660941333928172, "grad_norm": 0.17667576670646667, "learning_rate": 5.384456529095999e-06, "loss": 0.4515, "step": 8814 }, { "epoch": 1.9663171983047065, "grad_norm": 0.18594609200954437, "learning_rate": 5.382368901793759e-06, "loss": 0.4855, "step": 8815 }, { "epoch": 1.966540263216596, "grad_norm": 0.1851484477519989, "learning_rate": 5.380281530244974e-06, "loss": 0.417, "step": 8816 }, { "epoch": 1.9667633281284855, "grad_norm": 0.18481191992759705, "learning_rate": 5.378194414565258e-06, "loss": 0.4504, "step": 8817 }, { "epoch": 1.9669863930403748, "grad_norm": 0.17994412779808044, "learning_rate": 5.376107554870207e-06, "loss": 0.4565, "step": 8818 }, { "epoch": 1.967209457952264, "grad_norm": 0.17830024659633636, "learning_rate": 5.374020951275409e-06, "loss": 0.4562, "step": 8819 }, { "epoch": 1.9674325228641534, "grad_norm": 0.19198620319366455, "learning_rate": 5.371934603896426e-06, "loss": 0.4414, "step": 8820 }, { "epoch": 1.9676555877760429, "grad_norm": 0.17319843173027039, "learning_rate": 5.369848512848816e-06, "loss": 0.4351, "step": 8821 }, { "epoch": 1.9678786526879322, "grad_norm": 0.1835477352142334, "learning_rate": 5.3677626782481205e-06, "loss": 0.4447, "step": 8822 }, { "epoch": 1.9681017175998217, "grad_norm": 0.17269068956375122, "learning_rate": 5.365677100209868e-06, "loss": 0.4313, "step": 8823 }, { "epoch": 1.968324782511711, "grad_norm": 0.17792171239852905, "learning_rate": 5.363591778849566e-06, "loss": 0.4276, "step": 8824 }, { "epoch": 1.9685478474236002, "grad_norm": 0.17253033816814423, "learning_rate": 5.361506714282716e-06, "loss": 0.4399, "step": 8825 }, { "epoch": 1.9687709123354895, "grad_norm": 0.17550261318683624, "learning_rate": 5.359421906624802e-06, "loss": 0.4381, "step": 8826 }, { "epoch": 1.968993977247379, "grad_norm": 0.18501095473766327, "learning_rate": 5.357337355991296e-06, "loss": 0.4414, "step": 8827 }, { "epoch": 1.9692170421592683, "grad_norm": 0.17807962000370026, "learning_rate": 5.3552530624976475e-06, "loss": 0.4627, "step": 8828 }, { "epoch": 1.9694401070711578, "grad_norm": 0.17999139428138733, "learning_rate": 5.3531690262593e-06, "loss": 0.446, "step": 8829 }, { "epoch": 1.969663171983047, "grad_norm": 0.17978177964687347, "learning_rate": 5.351085247391681e-06, "loss": 0.4616, "step": 8830 }, { "epoch": 1.9698862368949364, "grad_norm": 0.18131205439567566, "learning_rate": 5.34900172601021e-06, "loss": 0.4418, "step": 8831 }, { "epoch": 1.9701093018068256, "grad_norm": 0.17810958623886108, "learning_rate": 5.3469184622302725e-06, "loss": 0.4492, "step": 8832 }, { "epoch": 1.9703323667187151, "grad_norm": 0.1817857027053833, "learning_rate": 5.344835456167262e-06, "loss": 0.4706, "step": 8833 }, { "epoch": 1.9705554316306046, "grad_norm": 0.18190069496631622, "learning_rate": 5.342752707936546e-06, "loss": 0.4614, "step": 8834 }, { "epoch": 1.970778496542494, "grad_norm": 0.18769033253192902, "learning_rate": 5.340670217653483e-06, "loss": 0.4537, "step": 8835 }, { "epoch": 1.9710015614543832, "grad_norm": 0.18518251180648804, "learning_rate": 5.338587985433409e-06, "loss": 0.4551, "step": 8836 }, { "epoch": 1.9712246263662725, "grad_norm": 0.1764073669910431, "learning_rate": 5.336506011391653e-06, "loss": 0.4309, "step": 8837 }, { "epoch": 1.971447691278162, "grad_norm": 0.3080836534500122, "learning_rate": 5.334424295643529e-06, "loss": 0.4195, "step": 8838 }, { "epoch": 1.9716707561900513, "grad_norm": 0.18763823807239532, "learning_rate": 5.332342838304335e-06, "loss": 0.4559, "step": 8839 }, { "epoch": 1.9718938211019408, "grad_norm": 0.19015417993068695, "learning_rate": 5.330261639489359e-06, "loss": 0.4487, "step": 8840 }, { "epoch": 1.97211688601383, "grad_norm": 0.18165238201618195, "learning_rate": 5.328180699313864e-06, "loss": 0.4439, "step": 8841 }, { "epoch": 1.9723399509257193, "grad_norm": 0.28610339760780334, "learning_rate": 5.326100017893107e-06, "loss": 0.4331, "step": 8842 }, { "epoch": 1.9725630158376086, "grad_norm": 0.1734454482793808, "learning_rate": 5.324019595342334e-06, "loss": 0.4424, "step": 8843 }, { "epoch": 1.9727860807494981, "grad_norm": 0.18375049531459808, "learning_rate": 5.32193943177677e-06, "loss": 0.4364, "step": 8844 }, { "epoch": 1.9730091456613874, "grad_norm": 0.1872616410255432, "learning_rate": 5.319859527311624e-06, "loss": 0.4591, "step": 8845 }, { "epoch": 1.973232210573277, "grad_norm": 0.18444837629795074, "learning_rate": 5.3177798820620975e-06, "loss": 0.4462, "step": 8846 }, { "epoch": 1.9734552754851662, "grad_norm": 0.1809314340353012, "learning_rate": 5.315700496143372e-06, "loss": 0.4382, "step": 8847 }, { "epoch": 1.9736783403970555, "grad_norm": 0.18612340092658997, "learning_rate": 5.313621369670623e-06, "loss": 0.4391, "step": 8848 }, { "epoch": 1.9739014053089448, "grad_norm": 0.18118642270565033, "learning_rate": 5.311542502758996e-06, "loss": 0.4659, "step": 8849 }, { "epoch": 1.9741244702208343, "grad_norm": 0.19646091759204865, "learning_rate": 5.309463895523638e-06, "loss": 0.4594, "step": 8850 }, { "epoch": 1.9743475351327238, "grad_norm": 0.18082433938980103, "learning_rate": 5.3073855480796735e-06, "loss": 0.472, "step": 8851 }, { "epoch": 1.974570600044613, "grad_norm": 0.17272797226905823, "learning_rate": 5.305307460542219e-06, "loss": 0.4375, "step": 8852 }, { "epoch": 1.9747936649565023, "grad_norm": 0.19139288365840912, "learning_rate": 5.303229633026368e-06, "loss": 0.459, "step": 8853 }, { "epoch": 1.9750167298683916, "grad_norm": 0.18878120183944702, "learning_rate": 5.3011520656472e-06, "loss": 0.4701, "step": 8854 }, { "epoch": 1.975239794780281, "grad_norm": 0.1769939512014389, "learning_rate": 5.299074758519788e-06, "loss": 0.4322, "step": 8855 }, { "epoch": 1.9754628596921704, "grad_norm": 0.1793774664402008, "learning_rate": 5.296997711759186e-06, "loss": 0.4352, "step": 8856 }, { "epoch": 1.97568592460406, "grad_norm": 0.17794568836688995, "learning_rate": 5.2949209254804366e-06, "loss": 0.4493, "step": 8857 }, { "epoch": 1.9759089895159492, "grad_norm": 0.185879185795784, "learning_rate": 5.292844399798559e-06, "loss": 0.4612, "step": 8858 }, { "epoch": 1.9761320544278385, "grad_norm": 0.18604058027267456, "learning_rate": 5.290768134828568e-06, "loss": 0.4597, "step": 8859 }, { "epoch": 1.9763551193397277, "grad_norm": 0.19229385256767273, "learning_rate": 5.28869213068546e-06, "loss": 0.4597, "step": 8860 }, { "epoch": 1.9765781842516172, "grad_norm": 0.1805671751499176, "learning_rate": 5.286616387484217e-06, "loss": 0.4478, "step": 8861 }, { "epoch": 1.9768012491635067, "grad_norm": 0.18257898092269897, "learning_rate": 5.28454090533981e-06, "loss": 0.4505, "step": 8862 }, { "epoch": 1.977024314075396, "grad_norm": 0.20359763503074646, "learning_rate": 5.2824656843671865e-06, "loss": 0.4693, "step": 8863 }, { "epoch": 1.9772473789872853, "grad_norm": 0.1799335926771164, "learning_rate": 5.280390724681288e-06, "loss": 0.414, "step": 8864 }, { "epoch": 1.9774704438991746, "grad_norm": 0.17781312763690948, "learning_rate": 5.278316026397037e-06, "loss": 0.4495, "step": 8865 }, { "epoch": 1.9776935088110639, "grad_norm": 0.18418540060520172, "learning_rate": 5.276241589629349e-06, "loss": 0.4328, "step": 8866 }, { "epoch": 1.9779165737229534, "grad_norm": 0.1901324838399887, "learning_rate": 5.274167414493113e-06, "loss": 0.4648, "step": 8867 }, { "epoch": 1.9781396386348429, "grad_norm": 0.17587338387966156, "learning_rate": 5.272093501103213e-06, "loss": 0.4451, "step": 8868 }, { "epoch": 1.9783627035467322, "grad_norm": 0.17703662812709808, "learning_rate": 5.270019849574513e-06, "loss": 0.4693, "step": 8869 }, { "epoch": 1.9785857684586214, "grad_norm": 0.2079380750656128, "learning_rate": 5.26794646002187e-06, "loss": 0.47, "step": 8870 }, { "epoch": 1.9788088333705107, "grad_norm": 0.21435651183128357, "learning_rate": 5.265873332560114e-06, "loss": 0.4404, "step": 8871 }, { "epoch": 1.9790318982824002, "grad_norm": 0.1856585443019867, "learning_rate": 5.263800467304072e-06, "loss": 0.4442, "step": 8872 }, { "epoch": 1.9792549631942895, "grad_norm": 0.178078293800354, "learning_rate": 5.261727864368554e-06, "loss": 0.4459, "step": 8873 }, { "epoch": 1.979478028106179, "grad_norm": 0.17547845840454102, "learning_rate": 5.259655523868352e-06, "loss": 0.4636, "step": 8874 }, { "epoch": 1.9797010930180683, "grad_norm": 0.17804180085659027, "learning_rate": 5.257583445918244e-06, "loss": 0.4311, "step": 8875 }, { "epoch": 1.9799241579299576, "grad_norm": 0.180350661277771, "learning_rate": 5.2555116306329945e-06, "loss": 0.4316, "step": 8876 }, { "epoch": 1.9801472228418469, "grad_norm": 0.17848455905914307, "learning_rate": 5.253440078127355e-06, "loss": 0.4671, "step": 8877 }, { "epoch": 1.9803702877537364, "grad_norm": 0.1925760954618454, "learning_rate": 5.251368788516066e-06, "loss": 0.462, "step": 8878 }, { "epoch": 1.9805933526656259, "grad_norm": 0.19506682455539703, "learning_rate": 5.249297761913839e-06, "loss": 0.4718, "step": 8879 }, { "epoch": 1.9808164175775151, "grad_norm": 0.18116964399814606, "learning_rate": 5.2472269984353845e-06, "loss": 0.4597, "step": 8880 }, { "epoch": 1.9810394824894044, "grad_norm": 0.18594235181808472, "learning_rate": 5.245156498195395e-06, "loss": 0.453, "step": 8881 }, { "epoch": 1.9812625474012937, "grad_norm": 0.1806681901216507, "learning_rate": 5.243086261308548e-06, "loss": 0.4442, "step": 8882 }, { "epoch": 1.981485612313183, "grad_norm": 0.18260101974010468, "learning_rate": 5.241016287889511e-06, "loss": 0.4457, "step": 8883 }, { "epoch": 1.9817086772250725, "grad_norm": 0.17734308540821075, "learning_rate": 5.238946578052921e-06, "loss": 0.449, "step": 8884 }, { "epoch": 1.981931742136962, "grad_norm": 0.1897689253091812, "learning_rate": 5.236877131913421e-06, "loss": 0.4611, "step": 8885 }, { "epoch": 1.9821548070488513, "grad_norm": 0.1725919246673584, "learning_rate": 5.234807949585623e-06, "loss": 0.4231, "step": 8886 }, { "epoch": 1.9823778719607406, "grad_norm": 0.18596228957176208, "learning_rate": 5.2327390311841404e-06, "loss": 0.45, "step": 8887 }, { "epoch": 1.9826009368726298, "grad_norm": 0.18885090947151184, "learning_rate": 5.230670376823555e-06, "loss": 0.4753, "step": 8888 }, { "epoch": 1.9828240017845193, "grad_norm": 0.18967807292938232, "learning_rate": 5.228601986618442e-06, "loss": 0.449, "step": 8889 }, { "epoch": 1.9830470666964086, "grad_norm": 0.18121550977230072, "learning_rate": 5.226533860683366e-06, "loss": 0.4322, "step": 8890 }, { "epoch": 1.9832701316082981, "grad_norm": 0.181602343916893, "learning_rate": 5.224465999132875e-06, "loss": 0.4414, "step": 8891 }, { "epoch": 1.9834931965201874, "grad_norm": 0.17742736637592316, "learning_rate": 5.222398402081492e-06, "loss": 0.4493, "step": 8892 }, { "epoch": 1.9837162614320767, "grad_norm": 0.17834076285362244, "learning_rate": 5.220331069643737e-06, "loss": 0.4529, "step": 8893 }, { "epoch": 1.983939326343966, "grad_norm": 0.17975963652133942, "learning_rate": 5.218264001934114e-06, "loss": 0.4164, "step": 8894 }, { "epoch": 1.9841623912558555, "grad_norm": 0.18679198622703552, "learning_rate": 5.216197199067112e-06, "loss": 0.431, "step": 8895 }, { "epoch": 1.984385456167745, "grad_norm": 0.1757335662841797, "learning_rate": 5.2141306611571965e-06, "loss": 0.4427, "step": 8896 }, { "epoch": 1.9846085210796343, "grad_norm": 0.19077639281749725, "learning_rate": 5.21206438831883e-06, "loss": 0.4358, "step": 8897 }, { "epoch": 1.9848315859915235, "grad_norm": 0.18716174364089966, "learning_rate": 5.2099983806664546e-06, "loss": 0.4621, "step": 8898 }, { "epoch": 1.9850546509034128, "grad_norm": 0.1881352961063385, "learning_rate": 5.207932638314502e-06, "loss": 0.461, "step": 8899 }, { "epoch": 1.985277715815302, "grad_norm": 0.18843919038772583, "learning_rate": 5.2058671613773805e-06, "loss": 0.4489, "step": 8900 }, { "epoch": 1.9855007807271916, "grad_norm": 0.18741180002689362, "learning_rate": 5.203801949969491e-06, "loss": 0.4648, "step": 8901 }, { "epoch": 1.985723845639081, "grad_norm": 0.17685861885547638, "learning_rate": 5.2017370042052205e-06, "loss": 0.4629, "step": 8902 }, { "epoch": 1.9859469105509704, "grad_norm": 0.16930168867111206, "learning_rate": 5.199672324198935e-06, "loss": 0.4395, "step": 8903 }, { "epoch": 1.9861699754628597, "grad_norm": 0.17272312939167023, "learning_rate": 5.197607910064997e-06, "loss": 0.4509, "step": 8904 }, { "epoch": 1.986393040374749, "grad_norm": 0.18139375746250153, "learning_rate": 5.195543761917736e-06, "loss": 0.4504, "step": 8905 }, { "epoch": 1.9866161052866385, "grad_norm": 0.18100585043430328, "learning_rate": 5.193479879871483e-06, "loss": 0.4445, "step": 8906 }, { "epoch": 1.9868391701985277, "grad_norm": 0.2222553938627243, "learning_rate": 5.19141626404055e-06, "loss": 0.4387, "step": 8907 }, { "epoch": 1.9870622351104172, "grad_norm": 0.17244549095630646, "learning_rate": 5.189352914539233e-06, "loss": 0.4727, "step": 8908 }, { "epoch": 1.9872853000223065, "grad_norm": 0.1766839474439621, "learning_rate": 5.18728983148181e-06, "loss": 0.4521, "step": 8909 }, { "epoch": 1.9875083649341958, "grad_norm": 0.1754491627216339, "learning_rate": 5.185227014982548e-06, "loss": 0.4548, "step": 8910 }, { "epoch": 1.987731429846085, "grad_norm": 0.17780523002147675, "learning_rate": 5.183164465155699e-06, "loss": 0.4563, "step": 8911 }, { "epoch": 1.9879544947579746, "grad_norm": 0.1709602177143097, "learning_rate": 5.181102182115507e-06, "loss": 0.4194, "step": 8912 }, { "epoch": 1.988177559669864, "grad_norm": 0.1960027664899826, "learning_rate": 5.179040165976183e-06, "loss": 0.4324, "step": 8913 }, { "epoch": 1.9884006245817534, "grad_norm": 0.18167687952518463, "learning_rate": 5.176978416851941e-06, "loss": 0.4349, "step": 8914 }, { "epoch": 1.9886236894936427, "grad_norm": 0.1885111927986145, "learning_rate": 5.17491693485697e-06, "loss": 0.4979, "step": 8915 }, { "epoch": 1.988846754405532, "grad_norm": 0.18724943697452545, "learning_rate": 5.172855720105456e-06, "loss": 0.4774, "step": 8916 }, { "epoch": 1.9890698193174212, "grad_norm": 0.1897243857383728, "learning_rate": 5.1707947727115515e-06, "loss": 0.4415, "step": 8917 }, { "epoch": 1.9892928842293107, "grad_norm": 0.18215550482273102, "learning_rate": 5.16873409278941e-06, "loss": 0.4539, "step": 8918 }, { "epoch": 1.9895159491412002, "grad_norm": 0.17774201929569244, "learning_rate": 5.1666736804531646e-06, "loss": 0.4199, "step": 8919 }, { "epoch": 1.9897390140530895, "grad_norm": 0.17743372917175293, "learning_rate": 5.164613535816937e-06, "loss": 0.4457, "step": 8920 }, { "epoch": 1.9899620789649788, "grad_norm": 0.17990683019161224, "learning_rate": 5.162553658994823e-06, "loss": 0.4648, "step": 8921 }, { "epoch": 1.990185143876868, "grad_norm": 0.18305742740631104, "learning_rate": 5.160494050100917e-06, "loss": 0.4792, "step": 8922 }, { "epoch": 1.9904082087887576, "grad_norm": 0.18619424104690552, "learning_rate": 5.158434709249291e-06, "loss": 0.425, "step": 8923 }, { "epoch": 1.9906312737006469, "grad_norm": 0.17404185235500336, "learning_rate": 5.156375636554007e-06, "loss": 0.4299, "step": 8924 }, { "epoch": 1.9908543386125364, "grad_norm": 0.17407676577568054, "learning_rate": 5.1543168321291115e-06, "loss": 0.4386, "step": 8925 }, { "epoch": 1.9910774035244256, "grad_norm": 0.17693208158016205, "learning_rate": 5.152258296088626e-06, "loss": 0.4306, "step": 8926 }, { "epoch": 1.991300468436315, "grad_norm": 0.1785212606191635, "learning_rate": 5.15020002854657e-06, "loss": 0.4715, "step": 8927 }, { "epoch": 1.9915235333482042, "grad_norm": 0.18704737722873688, "learning_rate": 5.148142029616943e-06, "loss": 0.4653, "step": 8928 }, { "epoch": 1.9917465982600937, "grad_norm": 0.19586940109729767, "learning_rate": 5.146084299413732e-06, "loss": 0.4784, "step": 8929 }, { "epoch": 1.9919696631719832, "grad_norm": 0.18988698720932007, "learning_rate": 5.144026838050902e-06, "loss": 0.4543, "step": 8930 }, { "epoch": 1.9921927280838725, "grad_norm": 0.17881841957569122, "learning_rate": 5.141969645642412e-06, "loss": 0.4286, "step": 8931 }, { "epoch": 1.9924157929957618, "grad_norm": 0.18214870989322662, "learning_rate": 5.139912722302201e-06, "loss": 0.435, "step": 8932 }, { "epoch": 1.992638857907651, "grad_norm": 0.26209428906440735, "learning_rate": 5.137856068144197e-06, "loss": 0.4628, "step": 8933 }, { "epoch": 1.9928619228195403, "grad_norm": 0.17895345389842987, "learning_rate": 5.135799683282309e-06, "loss": 0.4708, "step": 8934 }, { "epoch": 1.9930849877314298, "grad_norm": 0.18702343106269836, "learning_rate": 5.133743567830427e-06, "loss": 0.4324, "step": 8935 }, { "epoch": 1.9933080526433193, "grad_norm": 0.18158088624477386, "learning_rate": 5.1316877219024375e-06, "loss": 0.4554, "step": 8936 }, { "epoch": 1.9935311175552086, "grad_norm": 0.1866513192653656, "learning_rate": 5.129632145612204e-06, "loss": 0.4397, "step": 8937 }, { "epoch": 1.993754182467098, "grad_norm": 0.19149568676948547, "learning_rate": 5.127576839073583e-06, "loss": 0.432, "step": 8938 }, { "epoch": 1.9939772473789872, "grad_norm": 0.18493977189064026, "learning_rate": 5.1255218024004e-06, "loss": 0.4445, "step": 8939 }, { "epoch": 1.9942003122908767, "grad_norm": 0.18081733584403992, "learning_rate": 5.123467035706482e-06, "loss": 0.4412, "step": 8940 }, { "epoch": 1.994423377202766, "grad_norm": 0.17544502019882202, "learning_rate": 5.121412539105635e-06, "loss": 0.4076, "step": 8941 }, { "epoch": 1.9946464421146555, "grad_norm": 0.20097681879997253, "learning_rate": 5.119358312711651e-06, "loss": 0.4676, "step": 8942 }, { "epoch": 1.9948695070265448, "grad_norm": 0.17874199151992798, "learning_rate": 5.117304356638301e-06, "loss": 0.4525, "step": 8943 }, { "epoch": 1.995092571938434, "grad_norm": 0.18695956468582153, "learning_rate": 5.11525067099935e-06, "loss": 0.4498, "step": 8944 }, { "epoch": 1.9953156368503233, "grad_norm": 0.18029382824897766, "learning_rate": 5.113197255908543e-06, "loss": 0.454, "step": 8945 }, { "epoch": 1.9955387017622128, "grad_norm": 0.17514650523662567, "learning_rate": 5.111144111479611e-06, "loss": 0.4191, "step": 8946 }, { "epoch": 1.9957617666741023, "grad_norm": 0.17547555267810822, "learning_rate": 5.109091237826273e-06, "loss": 0.4276, "step": 8947 }, { "epoch": 1.9959848315859916, "grad_norm": 0.18239977955818176, "learning_rate": 5.107038635062225e-06, "loss": 0.4447, "step": 8948 }, { "epoch": 1.9962078964978809, "grad_norm": 0.1756354421377182, "learning_rate": 5.1049863033011535e-06, "loss": 0.4251, "step": 8949 }, { "epoch": 1.9964309614097702, "grad_norm": 0.18526077270507812, "learning_rate": 5.1029342426567345e-06, "loss": 0.4603, "step": 8950 }, { "epoch": 1.9966540263216594, "grad_norm": 0.20123088359832764, "learning_rate": 5.100882453242622e-06, "loss": 0.4473, "step": 8951 }, { "epoch": 1.996877091233549, "grad_norm": 0.17634332180023193, "learning_rate": 5.098830935172453e-06, "loss": 0.4318, "step": 8952 }, { "epoch": 1.9971001561454385, "grad_norm": 0.19645899534225464, "learning_rate": 5.096779688559857e-06, "loss": 0.4572, "step": 8953 }, { "epoch": 1.9973232210573277, "grad_norm": 0.17646844685077667, "learning_rate": 5.094728713518442e-06, "loss": 0.4428, "step": 8954 }, { "epoch": 1.997546285969217, "grad_norm": 0.17658363282680511, "learning_rate": 5.092678010161812e-06, "loss": 0.4518, "step": 8955 }, { "epoch": 1.9977693508811063, "grad_norm": 0.17553554475307465, "learning_rate": 5.090627578603537e-06, "loss": 0.4602, "step": 8956 }, { "epoch": 1.9979924157929958, "grad_norm": 0.18948344886302948, "learning_rate": 5.08857741895719e-06, "loss": 0.4539, "step": 8957 }, { "epoch": 1.998215480704885, "grad_norm": 0.17445118725299835, "learning_rate": 5.086527531336318e-06, "loss": 0.4532, "step": 8958 }, { "epoch": 1.9984385456167746, "grad_norm": 0.1809672862291336, "learning_rate": 5.084477915854462e-06, "loss": 0.4203, "step": 8959 }, { "epoch": 1.9986616105286639, "grad_norm": 0.17913390696048737, "learning_rate": 5.082428572625136e-06, "loss": 0.4322, "step": 8960 }, { "epoch": 1.9988846754405531, "grad_norm": 0.18459449708461761, "learning_rate": 5.080379501761848e-06, "loss": 0.4865, "step": 8961 }, { "epoch": 1.9991077403524424, "grad_norm": 0.18868421018123627, "learning_rate": 5.0783307033780895e-06, "loss": 0.4571, "step": 8962 }, { "epoch": 1.999330805264332, "grad_norm": 0.17596866190433502, "learning_rate": 5.076282177587339e-06, "loss": 0.4382, "step": 8963 }, { "epoch": 1.9995538701762214, "grad_norm": 0.174740731716156, "learning_rate": 5.074233924503047e-06, "loss": 0.4407, "step": 8964 }, { "epoch": 1.9997769350881107, "grad_norm": 0.17470811307430267, "learning_rate": 5.072185944238665e-06, "loss": 0.4456, "step": 8965 }, { "epoch": 2.0, "grad_norm": 0.18581973016262054, "learning_rate": 5.070138236907625e-06, "loss": 0.4513, "step": 8966 }, { "epoch": 2.0, "eval_loss": 0.31910037994384766, "eval_runtime": 667.801, "eval_samples_per_second": 94.504, "eval_steps_per_second": 1.478, "step": 8966 } ], "logging_steps": 1, "max_steps": 13449, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.876226391488531e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }