| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 945, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0010582010582010583, | |
| "grad_norm": 762.5523071289062, | |
| "learning_rate": 5e-05, | |
| "loss": 58.7334, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0021164021164021165, | |
| "grad_norm": 82.28478240966797, | |
| "learning_rate": 4.9947089947089946e-05, | |
| "loss": 49.8621, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0031746031746031746, | |
| "grad_norm": 240.16102600097656, | |
| "learning_rate": 4.9894179894179895e-05, | |
| "loss": 44.4804, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.004232804232804233, | |
| "grad_norm": 224.8407745361328, | |
| "learning_rate": 4.9841269841269845e-05, | |
| "loss": 43.2508, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.005291005291005291, | |
| "grad_norm": 64.33789825439453, | |
| "learning_rate": 4.978835978835979e-05, | |
| "loss": 40.2405, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.006349206349206349, | |
| "grad_norm": 118.09500885009766, | |
| "learning_rate": 4.973544973544973e-05, | |
| "loss": 35.3972, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.007407407407407408, | |
| "grad_norm": 111.19491577148438, | |
| "learning_rate": 4.968253968253969e-05, | |
| "loss": 33.5484, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.008465608465608466, | |
| "grad_norm": 157.63052368164062, | |
| "learning_rate": 4.962962962962963e-05, | |
| "loss": 29.6581, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.009523809523809525, | |
| "grad_norm": 227.2969970703125, | |
| "learning_rate": 4.9576719576719575e-05, | |
| "loss": 22.3382, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.010582010582010581, | |
| "grad_norm": 169.19720458984375, | |
| "learning_rate": 4.9523809523809525e-05, | |
| "loss": 14.1501, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01164021164021164, | |
| "grad_norm": 80.01154327392578, | |
| "learning_rate": 4.9470899470899475e-05, | |
| "loss": 10.3732, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.012698412698412698, | |
| "grad_norm": 74.98716735839844, | |
| "learning_rate": 4.941798941798942e-05, | |
| "loss": 10.4696, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.013756613756613757, | |
| "grad_norm": 84.27120971679688, | |
| "learning_rate": 4.936507936507937e-05, | |
| "loss": 9.5193, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.014814814814814815, | |
| "grad_norm": 70.76972198486328, | |
| "learning_rate": 4.931216931216932e-05, | |
| "loss": 8.4595, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.015873015873015872, | |
| "grad_norm": 46.97925567626953, | |
| "learning_rate": 4.925925925925926e-05, | |
| "loss": 7.5167, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.016931216931216932, | |
| "grad_norm": 22.022876739501953, | |
| "learning_rate": 4.9206349206349204e-05, | |
| "loss": 7.1356, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.01798941798941799, | |
| "grad_norm": 23.84092903137207, | |
| "learning_rate": 4.9153439153439154e-05, | |
| "loss": 7.0376, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.01904761904761905, | |
| "grad_norm": 21.349409103393555, | |
| "learning_rate": 4.9100529100529104e-05, | |
| "loss": 7.0128, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.020105820105820106, | |
| "grad_norm": 12.040107727050781, | |
| "learning_rate": 4.904761904761905e-05, | |
| "loss": 6.4316, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.021164021164021163, | |
| "grad_norm": 12.285114288330078, | |
| "learning_rate": 4.8994708994709e-05, | |
| "loss": 6.4124, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.022222222222222223, | |
| "grad_norm": 17.068172454833984, | |
| "learning_rate": 4.894179894179895e-05, | |
| "loss": 6.2872, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.02328042328042328, | |
| "grad_norm": 15.330672264099121, | |
| "learning_rate": 4.888888888888889e-05, | |
| "loss": 6.1388, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.02433862433862434, | |
| "grad_norm": 5.708643436431885, | |
| "learning_rate": 4.883597883597884e-05, | |
| "loss": 5.9494, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.025396825396825397, | |
| "grad_norm": 11.539711952209473, | |
| "learning_rate": 4.878306878306878e-05, | |
| "loss": 5.9674, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.026455026455026454, | |
| "grad_norm": 10.946340560913086, | |
| "learning_rate": 4.873015873015873e-05, | |
| "loss": 5.8943, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.027513227513227514, | |
| "grad_norm": 7.171319484710693, | |
| "learning_rate": 4.8677248677248676e-05, | |
| "loss": 5.7903, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.02857142857142857, | |
| "grad_norm": 6.202214241027832, | |
| "learning_rate": 4.8624338624338626e-05, | |
| "loss": 5.8376, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.02962962962962963, | |
| "grad_norm": 11.297914505004883, | |
| "learning_rate": 4.8571428571428576e-05, | |
| "loss": 5.492, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.030687830687830688, | |
| "grad_norm": 8.590489387512207, | |
| "learning_rate": 4.851851851851852e-05, | |
| "loss": 5.9732, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.031746031746031744, | |
| "grad_norm": 4.967828750610352, | |
| "learning_rate": 4.846560846560847e-05, | |
| "loss": 5.8429, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0328042328042328, | |
| "grad_norm": 7.755899429321289, | |
| "learning_rate": 4.841269841269841e-05, | |
| "loss": 5.3487, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.033862433862433865, | |
| "grad_norm": 5.126328468322754, | |
| "learning_rate": 4.835978835978836e-05, | |
| "loss": 5.3793, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.03492063492063492, | |
| "grad_norm": 7.324168682098389, | |
| "learning_rate": 4.830687830687831e-05, | |
| "loss": 6.0176, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.03597883597883598, | |
| "grad_norm": 5.837782382965088, | |
| "learning_rate": 4.8253968253968255e-05, | |
| "loss": 5.531, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.037037037037037035, | |
| "grad_norm": 7.6066460609436035, | |
| "learning_rate": 4.8201058201058205e-05, | |
| "loss": 5.617, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0380952380952381, | |
| "grad_norm": 82.80633544921875, | |
| "learning_rate": 4.814814814814815e-05, | |
| "loss": 5.664, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.039153439153439155, | |
| "grad_norm": 12.928624153137207, | |
| "learning_rate": 4.80952380952381e-05, | |
| "loss": 5.2884, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.04021164021164021, | |
| "grad_norm": 8.23830795288086, | |
| "learning_rate": 4.804232804232804e-05, | |
| "loss": 5.4968, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.04126984126984127, | |
| "grad_norm": 8.692220687866211, | |
| "learning_rate": 4.798941798941799e-05, | |
| "loss": 5.3659, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.042328042328042326, | |
| "grad_norm": 6.263346195220947, | |
| "learning_rate": 4.793650793650794e-05, | |
| "loss": 5.2251, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04338624338624339, | |
| "grad_norm": 6.244935035705566, | |
| "learning_rate": 4.7883597883597884e-05, | |
| "loss": 5.243, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.044444444444444446, | |
| "grad_norm": 6.076992034912109, | |
| "learning_rate": 4.7830687830687834e-05, | |
| "loss": 5.4107, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.0455026455026455, | |
| "grad_norm": 5.009204387664795, | |
| "learning_rate": 4.7777777777777784e-05, | |
| "loss": 5.4213, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.04656084656084656, | |
| "grad_norm": 3.8069138526916504, | |
| "learning_rate": 4.772486772486773e-05, | |
| "loss": 5.2413, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.047619047619047616, | |
| "grad_norm": 5.226925373077393, | |
| "learning_rate": 4.767195767195767e-05, | |
| "loss": 5.3811, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.04867724867724868, | |
| "grad_norm": 5.156801223754883, | |
| "learning_rate": 4.761904761904762e-05, | |
| "loss": 5.5233, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.04973544973544974, | |
| "grad_norm": 4.208065509796143, | |
| "learning_rate": 4.756613756613757e-05, | |
| "loss": 5.1164, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.050793650793650794, | |
| "grad_norm": 4.350522994995117, | |
| "learning_rate": 4.7513227513227513e-05, | |
| "loss": 5.2106, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.05185185185185185, | |
| "grad_norm": 6.086979389190674, | |
| "learning_rate": 4.746031746031746e-05, | |
| "loss": 5.2525, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.05291005291005291, | |
| "grad_norm": 3.6869099140167236, | |
| "learning_rate": 4.740740740740741e-05, | |
| "loss": 5.1225, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05396825396825397, | |
| "grad_norm": 36.87986755371094, | |
| "learning_rate": 4.7354497354497356e-05, | |
| "loss": 6.1878, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.05502645502645503, | |
| "grad_norm": 4.030213832855225, | |
| "learning_rate": 4.73015873015873e-05, | |
| "loss": 5.2523, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.056084656084656084, | |
| "grad_norm": 6.104913711547852, | |
| "learning_rate": 4.7248677248677256e-05, | |
| "loss": 5.1113, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.05714285714285714, | |
| "grad_norm": 3.9061765670776367, | |
| "learning_rate": 4.71957671957672e-05, | |
| "loss": 5.3375, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.0582010582010582, | |
| "grad_norm": 4.80383825302124, | |
| "learning_rate": 4.714285714285714e-05, | |
| "loss": 4.9556, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.05925925925925926, | |
| "grad_norm": 19.7348690032959, | |
| "learning_rate": 4.708994708994709e-05, | |
| "loss": 5.4731, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.06031746031746032, | |
| "grad_norm": 5.619427680969238, | |
| "learning_rate": 4.703703703703704e-05, | |
| "loss": 5.2191, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.061375661375661375, | |
| "grad_norm": 4.279834747314453, | |
| "learning_rate": 4.6984126984126986e-05, | |
| "loss": 5.0808, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.06243386243386243, | |
| "grad_norm": 5.503246307373047, | |
| "learning_rate": 4.693121693121693e-05, | |
| "loss": 4.8678, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.06349206349206349, | |
| "grad_norm": 5.174645900726318, | |
| "learning_rate": 4.6878306878306885e-05, | |
| "loss": 4.8517, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06455026455026455, | |
| "grad_norm": 4.007596492767334, | |
| "learning_rate": 4.682539682539683e-05, | |
| "loss": 5.1492, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.0656084656084656, | |
| "grad_norm": 6.642776012420654, | |
| "learning_rate": 4.677248677248677e-05, | |
| "loss": 5.2119, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.06666666666666667, | |
| "grad_norm": 6.208924293518066, | |
| "learning_rate": 4.671957671957672e-05, | |
| "loss": 5.105, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.06772486772486773, | |
| "grad_norm": 5.9450531005859375, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 4.9118, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.06878306878306878, | |
| "grad_norm": 3.375946521759033, | |
| "learning_rate": 4.6613756613756615e-05, | |
| "loss": 4.9944, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.06984126984126984, | |
| "grad_norm": 4.091769218444824, | |
| "learning_rate": 4.656084656084656e-05, | |
| "loss": 5.0409, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.07089947089947089, | |
| "grad_norm": 6.6762542724609375, | |
| "learning_rate": 4.6507936507936515e-05, | |
| "loss": 5.022, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.07195767195767196, | |
| "grad_norm": 5.9758524894714355, | |
| "learning_rate": 4.645502645502646e-05, | |
| "loss": 4.8099, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.07301587301587302, | |
| "grad_norm": 4.996827602386475, | |
| "learning_rate": 4.64021164021164e-05, | |
| "loss": 4.917, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.07407407407407407, | |
| "grad_norm": 5.705566883087158, | |
| "learning_rate": 4.634920634920635e-05, | |
| "loss": 4.8434, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07513227513227513, | |
| "grad_norm": 11.020337104797363, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 5.4662, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0761904761904762, | |
| "grad_norm": 6.739878177642822, | |
| "learning_rate": 4.6243386243386244e-05, | |
| "loss": 4.5948, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.07724867724867725, | |
| "grad_norm": 6.3748674392700195, | |
| "learning_rate": 4.6190476190476194e-05, | |
| "loss": 4.8899, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.07830687830687831, | |
| "grad_norm": 4.912506103515625, | |
| "learning_rate": 4.6137566137566144e-05, | |
| "loss": 4.5843, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.07936507936507936, | |
| "grad_norm": 5.216796875, | |
| "learning_rate": 4.608465608465609e-05, | |
| "loss": 4.926, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.08042328042328042, | |
| "grad_norm": 6.009359359741211, | |
| "learning_rate": 4.603174603174603e-05, | |
| "loss": 5.1626, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.08148148148148149, | |
| "grad_norm": 6.634153366088867, | |
| "learning_rate": 4.597883597883598e-05, | |
| "loss": 5.0618, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.08253968253968254, | |
| "grad_norm": 5.179196834564209, | |
| "learning_rate": 4.592592592592593e-05, | |
| "loss": 5.137, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.0835978835978836, | |
| "grad_norm": 4.354217529296875, | |
| "learning_rate": 4.587301587301587e-05, | |
| "loss": 4.8585, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.08465608465608465, | |
| "grad_norm": 5.238408088684082, | |
| "learning_rate": 4.582010582010582e-05, | |
| "loss": 4.883, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08571428571428572, | |
| "grad_norm": 6.286460876464844, | |
| "learning_rate": 4.576719576719577e-05, | |
| "loss": 4.7807, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.08677248677248678, | |
| "grad_norm": 4.681844234466553, | |
| "learning_rate": 4.5714285714285716e-05, | |
| "loss": 4.9966, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.08783068783068783, | |
| "grad_norm": 4.610260963439941, | |
| "learning_rate": 4.5661375661375666e-05, | |
| "loss": 4.785, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": 3.8423781394958496, | |
| "learning_rate": 4.560846560846561e-05, | |
| "loss": 5.0132, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.08994708994708994, | |
| "grad_norm": 5.1862921714782715, | |
| "learning_rate": 4.555555555555556e-05, | |
| "loss": 4.8032, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.091005291005291, | |
| "grad_norm": 3.509219169616699, | |
| "learning_rate": 4.55026455026455e-05, | |
| "loss": 4.8898, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.09206349206349207, | |
| "grad_norm": 5.162282466888428, | |
| "learning_rate": 4.544973544973545e-05, | |
| "loss": 5.0933, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.09312169312169312, | |
| "grad_norm": 8.435232162475586, | |
| "learning_rate": 4.5396825396825395e-05, | |
| "loss": 4.8731, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.09417989417989418, | |
| "grad_norm": 5.817312717437744, | |
| "learning_rate": 4.5343915343915345e-05, | |
| "loss": 5.1801, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.09523809523809523, | |
| "grad_norm": 4.302613735198975, | |
| "learning_rate": 4.5291005291005295e-05, | |
| "loss": 4.748, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0962962962962963, | |
| "grad_norm": 3.806368350982666, | |
| "learning_rate": 4.523809523809524e-05, | |
| "loss": 5.0319, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.09735449735449736, | |
| "grad_norm": 5.066342830657959, | |
| "learning_rate": 4.518518518518519e-05, | |
| "loss": 4.6202, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.09841269841269841, | |
| "grad_norm": 6.008955478668213, | |
| "learning_rate": 4.513227513227514e-05, | |
| "loss": 5.0549, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.09947089947089947, | |
| "grad_norm": 4.634308338165283, | |
| "learning_rate": 4.507936507936508e-05, | |
| "loss": 4.825, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.10052910052910052, | |
| "grad_norm": 6.513881206512451, | |
| "learning_rate": 4.5026455026455024e-05, | |
| "loss": 4.9766, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.10158730158730159, | |
| "grad_norm": 4.2518815994262695, | |
| "learning_rate": 4.4973544973544974e-05, | |
| "loss": 5.0869, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.10264550264550265, | |
| "grad_norm": 3.959833860397339, | |
| "learning_rate": 4.4920634920634924e-05, | |
| "loss": 4.7034, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.1037037037037037, | |
| "grad_norm": 4.720004081726074, | |
| "learning_rate": 4.486772486772487e-05, | |
| "loss": 4.7978, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.10476190476190476, | |
| "grad_norm": 4.846654415130615, | |
| "learning_rate": 4.481481481481482e-05, | |
| "loss": 4.8179, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.10582010582010581, | |
| "grad_norm": 4.8786115646362305, | |
| "learning_rate": 4.476190476190477e-05, | |
| "loss": 4.6529, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10687830687830688, | |
| "grad_norm": 4.344315528869629, | |
| "learning_rate": 4.470899470899471e-05, | |
| "loss": 4.7327, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.10793650793650794, | |
| "grad_norm": 8.0552978515625, | |
| "learning_rate": 4.4656084656084654e-05, | |
| "loss": 4.7912, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.10899470899470899, | |
| "grad_norm": 6.734848976135254, | |
| "learning_rate": 4.460317460317461e-05, | |
| "loss": 4.597, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.11005291005291006, | |
| "grad_norm": 3.832582950592041, | |
| "learning_rate": 4.4550264550264553e-05, | |
| "loss": 4.8456, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": 3.7660605907440186, | |
| "learning_rate": 4.44973544973545e-05, | |
| "loss": 5.0973, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.11216931216931217, | |
| "grad_norm": 5.155391216278076, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 4.8983, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.11322751322751323, | |
| "grad_norm": 5.6872406005859375, | |
| "learning_rate": 4.4391534391534397e-05, | |
| "loss": 4.7054, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.11428571428571428, | |
| "grad_norm": 4.285317420959473, | |
| "learning_rate": 4.433862433862434e-05, | |
| "loss": 4.8328, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.11534391534391535, | |
| "grad_norm": 4.081933498382568, | |
| "learning_rate": 4.428571428571428e-05, | |
| "loss": 5.0816, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.1164021164021164, | |
| "grad_norm": 4.103255748748779, | |
| "learning_rate": 4.423280423280424e-05, | |
| "loss": 4.5584, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11746031746031746, | |
| "grad_norm": 3.3774573802948, | |
| "learning_rate": 4.417989417989418e-05, | |
| "loss": 4.7316, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.11851851851851852, | |
| "grad_norm": 4.408664226531982, | |
| "learning_rate": 4.4126984126984126e-05, | |
| "loss": 4.9052, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.11957671957671957, | |
| "grad_norm": 4.274510383605957, | |
| "learning_rate": 4.4074074074074076e-05, | |
| "loss": 4.7359, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.12063492063492064, | |
| "grad_norm": 4.7696309089660645, | |
| "learning_rate": 4.4021164021164026e-05, | |
| "loss": 4.7077, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.12169312169312169, | |
| "grad_norm": 5.227766990661621, | |
| "learning_rate": 4.396825396825397e-05, | |
| "loss": 4.7518, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.12275132275132275, | |
| "grad_norm": 4.35728645324707, | |
| "learning_rate": 4.391534391534391e-05, | |
| "loss": 4.7092, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.12380952380952381, | |
| "grad_norm": 6.191098690032959, | |
| "learning_rate": 4.386243386243387e-05, | |
| "loss": 4.6266, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.12486772486772486, | |
| "grad_norm": 9.239175796508789, | |
| "learning_rate": 4.380952380952381e-05, | |
| "loss": 5.0851, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.1259259259259259, | |
| "grad_norm": 3.643599510192871, | |
| "learning_rate": 4.3756613756613755e-05, | |
| "loss": 4.5317, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.12698412698412698, | |
| "grad_norm": 4.724228382110596, | |
| "learning_rate": 4.3703703703703705e-05, | |
| "loss": 4.9586, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.12804232804232804, | |
| "grad_norm": 8.117734909057617, | |
| "learning_rate": 4.3650793650793655e-05, | |
| "loss": 4.922, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.1291005291005291, | |
| "grad_norm": 5.805531978607178, | |
| "learning_rate": 4.35978835978836e-05, | |
| "loss": 4.6248, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.13015873015873017, | |
| "grad_norm": 4.43373441696167, | |
| "learning_rate": 4.354497354497355e-05, | |
| "loss": 4.7716, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.1312169312169312, | |
| "grad_norm": 3.115410327911377, | |
| "learning_rate": 4.34920634920635e-05, | |
| "loss": 4.8769, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.13227513227513227, | |
| "grad_norm": 3.7560129165649414, | |
| "learning_rate": 4.343915343915344e-05, | |
| "loss": 4.7546, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 3.7244677543640137, | |
| "learning_rate": 4.3386243386243384e-05, | |
| "loss": 4.8349, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1343915343915344, | |
| "grad_norm": 3.5653369426727295, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 4.6841, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.13544973544973546, | |
| "grad_norm": 5.319230556488037, | |
| "learning_rate": 4.3280423280423284e-05, | |
| "loss": 4.8841, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.1365079365079365, | |
| "grad_norm": 3.6799139976501465, | |
| "learning_rate": 4.322751322751323e-05, | |
| "loss": 4.6764, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.13756613756613756, | |
| "grad_norm": 4.678844928741455, | |
| "learning_rate": 4.317460317460318e-05, | |
| "loss": 4.8108, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.13862433862433862, | |
| "grad_norm": 4.520280838012695, | |
| "learning_rate": 4.312169312169313e-05, | |
| "loss": 4.4304, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.13968253968253969, | |
| "grad_norm": 3.9787561893463135, | |
| "learning_rate": 4.306878306878307e-05, | |
| "loss": 4.7079, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.14074074074074075, | |
| "grad_norm": 3.6860711574554443, | |
| "learning_rate": 4.301587301587302e-05, | |
| "loss": 4.4752, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.14179894179894179, | |
| "grad_norm": 5.72208309173584, | |
| "learning_rate": 4.296296296296296e-05, | |
| "loss": 4.6981, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 3.850085735321045, | |
| "learning_rate": 4.291005291005291e-05, | |
| "loss": 4.5628, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.1439153439153439, | |
| "grad_norm": 4.418358325958252, | |
| "learning_rate": 4.2857142857142856e-05, | |
| "loss": 4.6297, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.14497354497354498, | |
| "grad_norm": 5.23398494720459, | |
| "learning_rate": 4.2804232804232806e-05, | |
| "loss": 4.779, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.14603174603174604, | |
| "grad_norm": 5.489072322845459, | |
| "learning_rate": 4.2751322751322756e-05, | |
| "loss": 4.6993, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.14708994708994708, | |
| "grad_norm": 5.287248134613037, | |
| "learning_rate": 4.26984126984127e-05, | |
| "loss": 4.551, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.14814814814814814, | |
| "grad_norm": 5.898606777191162, | |
| "learning_rate": 4.264550264550265e-05, | |
| "loss": 4.8247, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.1492063492063492, | |
| "grad_norm": 4.300800800323486, | |
| "learning_rate": 4.259259259259259e-05, | |
| "loss": 4.4861, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.15026455026455027, | |
| "grad_norm": 5.065099716186523, | |
| "learning_rate": 4.253968253968254e-05, | |
| "loss": 4.6557, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.15132275132275133, | |
| "grad_norm": 4.122894287109375, | |
| "learning_rate": 4.248677248677249e-05, | |
| "loss": 4.66, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1523809523809524, | |
| "grad_norm": 5.119123458862305, | |
| "learning_rate": 4.2433862433862435e-05, | |
| "loss": 4.7191, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.15343915343915343, | |
| "grad_norm": 3.865441083908081, | |
| "learning_rate": 4.2380952380952385e-05, | |
| "loss": 4.5545, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.1544973544973545, | |
| "grad_norm": 5.3772406578063965, | |
| "learning_rate": 4.232804232804233e-05, | |
| "loss": 4.7646, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.15555555555555556, | |
| "grad_norm": 4.440810203552246, | |
| "learning_rate": 4.227513227513228e-05, | |
| "loss": 4.6804, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.15661375661375662, | |
| "grad_norm": 4.11602783203125, | |
| "learning_rate": 4.222222222222222e-05, | |
| "loss": 4.6774, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.15767195767195769, | |
| "grad_norm": 3.9843058586120605, | |
| "learning_rate": 4.216931216931217e-05, | |
| "loss": 4.4568, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.15873015873015872, | |
| "grad_norm": 4.767110347747803, | |
| "learning_rate": 4.211640211640212e-05, | |
| "loss": 4.7618, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.15978835978835979, | |
| "grad_norm": 3.820188522338867, | |
| "learning_rate": 4.2063492063492065e-05, | |
| "loss": 4.7003, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.16084656084656085, | |
| "grad_norm": 5.942455768585205, | |
| "learning_rate": 4.2010582010582014e-05, | |
| "loss": 4.3406, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.1619047619047619, | |
| "grad_norm": 4.969194412231445, | |
| "learning_rate": 4.1957671957671964e-05, | |
| "loss": 4.7058, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.16296296296296298, | |
| "grad_norm": 4.873613357543945, | |
| "learning_rate": 4.190476190476191e-05, | |
| "loss": 4.666, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.164021164021164, | |
| "grad_norm": 3.2713990211486816, | |
| "learning_rate": 4.185185185185185e-05, | |
| "loss": 4.7406, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.16507936507936508, | |
| "grad_norm": 4.688560962677002, | |
| "learning_rate": 4.17989417989418e-05, | |
| "loss": 4.5528, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.16613756613756614, | |
| "grad_norm": 4.477161884307861, | |
| "learning_rate": 4.174603174603175e-05, | |
| "loss": 4.6816, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.1671957671957672, | |
| "grad_norm": 3.5517561435699463, | |
| "learning_rate": 4.1693121693121694e-05, | |
| "loss": 4.615, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.16825396825396827, | |
| "grad_norm": 5.814800262451172, | |
| "learning_rate": 4.1640211640211644e-05, | |
| "loss": 4.2947, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.1693121693121693, | |
| "grad_norm": 4.193921089172363, | |
| "learning_rate": 4.1587301587301594e-05, | |
| "loss": 4.6911, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.17037037037037037, | |
| "grad_norm": 4.776106357574463, | |
| "learning_rate": 4.153439153439154e-05, | |
| "loss": 4.5339, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.17142857142857143, | |
| "grad_norm": 3.6364221572875977, | |
| "learning_rate": 4.148148148148148e-05, | |
| "loss": 4.8256, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.1724867724867725, | |
| "grad_norm": 4.010839462280273, | |
| "learning_rate": 4.1428571428571437e-05, | |
| "loss": 4.6134, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.17354497354497356, | |
| "grad_norm": 3.5767881870269775, | |
| "learning_rate": 4.137566137566138e-05, | |
| "loss": 4.7187, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.1746031746031746, | |
| "grad_norm": 6.417559623718262, | |
| "learning_rate": 4.132275132275132e-05, | |
| "loss": 4.9141, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.17566137566137566, | |
| "grad_norm": 4.13731050491333, | |
| "learning_rate": 4.126984126984127e-05, | |
| "loss": 4.8624, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.17671957671957672, | |
| "grad_norm": 5.958324432373047, | |
| "learning_rate": 4.121693121693122e-05, | |
| "loss": 4.7571, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 5.988956928253174, | |
| "learning_rate": 4.1164021164021166e-05, | |
| "loss": 4.5736, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.17883597883597885, | |
| "grad_norm": 7.006846904754639, | |
| "learning_rate": 4.111111111111111e-05, | |
| "loss": 4.4878, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.17989417989417988, | |
| "grad_norm": 4.999007225036621, | |
| "learning_rate": 4.105820105820106e-05, | |
| "loss": 4.8561, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.18095238095238095, | |
| "grad_norm": 3.71893572807312, | |
| "learning_rate": 4.100529100529101e-05, | |
| "loss": 4.5709, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.182010582010582, | |
| "grad_norm": 3.3149731159210205, | |
| "learning_rate": 4.095238095238095e-05, | |
| "loss": 4.846, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.18306878306878308, | |
| "grad_norm": 5.529186725616455, | |
| "learning_rate": 4.08994708994709e-05, | |
| "loss": 4.8602, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.18412698412698414, | |
| "grad_norm": 4.0738983154296875, | |
| "learning_rate": 4.084656084656085e-05, | |
| "loss": 4.7176, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.18518518518518517, | |
| "grad_norm": 5.210103988647461, | |
| "learning_rate": 4.0793650793650795e-05, | |
| "loss": 4.4552, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.18624338624338624, | |
| "grad_norm": 7.3695068359375, | |
| "learning_rate": 4.074074074074074e-05, | |
| "loss": 4.6735, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.1873015873015873, | |
| "grad_norm": 6.053244113922119, | |
| "learning_rate": 4.068783068783069e-05, | |
| "loss": 4.7158, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.18835978835978837, | |
| "grad_norm": 8.235685348510742, | |
| "learning_rate": 4.063492063492064e-05, | |
| "loss": 4.676, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.18941798941798943, | |
| "grad_norm": 6.053036689758301, | |
| "learning_rate": 4.058201058201058e-05, | |
| "loss": 4.8923, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.19047619047619047, | |
| "grad_norm": 7.517977237701416, | |
| "learning_rate": 4.052910052910053e-05, | |
| "loss": 4.6929, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.19153439153439153, | |
| "grad_norm": 3.8566505908966064, | |
| "learning_rate": 4.047619047619048e-05, | |
| "loss": 4.6957, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.1925925925925926, | |
| "grad_norm": 5.264885425567627, | |
| "learning_rate": 4.0423280423280424e-05, | |
| "loss": 4.7976, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.19365079365079366, | |
| "grad_norm": 5.861869812011719, | |
| "learning_rate": 4.0370370370370374e-05, | |
| "loss": 4.7883, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.19470899470899472, | |
| "grad_norm": 3.94100022315979, | |
| "learning_rate": 4.031746031746032e-05, | |
| "loss": 4.7193, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.19576719576719576, | |
| "grad_norm": 5.635229587554932, | |
| "learning_rate": 4.026455026455027e-05, | |
| "loss": 4.8176, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.19682539682539682, | |
| "grad_norm": 4.604954242706299, | |
| "learning_rate": 4.021164021164021e-05, | |
| "loss": 4.7797, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.19788359788359788, | |
| "grad_norm": 3.7886712551116943, | |
| "learning_rate": 4.015873015873016e-05, | |
| "loss": 4.6682, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.19894179894179895, | |
| "grad_norm": 5.277453422546387, | |
| "learning_rate": 4.010582010582011e-05, | |
| "loss": 4.7632, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 8.224989891052246, | |
| "learning_rate": 4.005291005291005e-05, | |
| "loss": 4.8058, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.20105820105820105, | |
| "grad_norm": 4.904975414276123, | |
| "learning_rate": 4e-05, | |
| "loss": 4.3213, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.2021164021164021, | |
| "grad_norm": 5.446446895599365, | |
| "learning_rate": 3.9947089947089946e-05, | |
| "loss": 4.831, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.20317460317460317, | |
| "grad_norm": 4.213114261627197, | |
| "learning_rate": 3.9894179894179896e-05, | |
| "loss": 4.6621, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.20423280423280424, | |
| "grad_norm": 5.95498514175415, | |
| "learning_rate": 3.984126984126984e-05, | |
| "loss": 4.5721, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.2052910052910053, | |
| "grad_norm": 5.409979820251465, | |
| "learning_rate": 3.978835978835979e-05, | |
| "loss": 4.8163, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.20634920634920634, | |
| "grad_norm": 3.3244705200195312, | |
| "learning_rate": 3.973544973544974e-05, | |
| "loss": 4.7915, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.2074074074074074, | |
| "grad_norm": 7.880211353302002, | |
| "learning_rate": 3.968253968253968e-05, | |
| "loss": 4.402, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.20846560846560847, | |
| "grad_norm": 4.982080936431885, | |
| "learning_rate": 3.962962962962963e-05, | |
| "loss": 4.675, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.20952380952380953, | |
| "grad_norm": 3.966547727584839, | |
| "learning_rate": 3.9576719576719576e-05, | |
| "loss": 4.4633, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2105820105820106, | |
| "grad_norm": 4.820013999938965, | |
| "learning_rate": 3.9523809523809526e-05, | |
| "loss": 4.8036, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.21164021164021163, | |
| "grad_norm": 4.312441825866699, | |
| "learning_rate": 3.9470899470899475e-05, | |
| "loss": 4.3439, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2126984126984127, | |
| "grad_norm": 4.462612628936768, | |
| "learning_rate": 3.941798941798942e-05, | |
| "loss": 4.5635, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.21375661375661376, | |
| "grad_norm": 4.000391960144043, | |
| "learning_rate": 3.936507936507937e-05, | |
| "loss": 4.8163, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.21481481481481482, | |
| "grad_norm": 4.748673439025879, | |
| "learning_rate": 3.931216931216931e-05, | |
| "loss": 4.531, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.21587301587301588, | |
| "grad_norm": 4.901307582855225, | |
| "learning_rate": 3.925925925925926e-05, | |
| "loss": 4.6288, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.21693121693121692, | |
| "grad_norm": 4.037147045135498, | |
| "learning_rate": 3.9206349206349205e-05, | |
| "loss": 4.4996, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.21798941798941798, | |
| "grad_norm": 3.361581802368164, | |
| "learning_rate": 3.9153439153439155e-05, | |
| "loss": 4.6278, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.21904761904761905, | |
| "grad_norm": 4.140801906585693, | |
| "learning_rate": 3.9100529100529105e-05, | |
| "loss": 4.7237, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.2201058201058201, | |
| "grad_norm": 4.984572410583496, | |
| "learning_rate": 3.904761904761905e-05, | |
| "loss": 4.5707, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.22116402116402117, | |
| "grad_norm": 7.3860578536987305, | |
| "learning_rate": 3.8994708994709e-05, | |
| "loss": 4.716, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 6.090242385864258, | |
| "learning_rate": 3.894179894179895e-05, | |
| "loss": 4.6022, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.22328042328042327, | |
| "grad_norm": 5.344094276428223, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 4.7154, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.22433862433862434, | |
| "grad_norm": 3.6889617443084717, | |
| "learning_rate": 3.8835978835978834e-05, | |
| "loss": 4.8087, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.2253968253968254, | |
| "grad_norm": 5.947376728057861, | |
| "learning_rate": 3.8783068783068784e-05, | |
| "loss": 4.7237, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.22645502645502646, | |
| "grad_norm": 4.882441520690918, | |
| "learning_rate": 3.8730158730158734e-05, | |
| "loss": 4.86, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.2275132275132275, | |
| "grad_norm": 4.7508625984191895, | |
| "learning_rate": 3.867724867724868e-05, | |
| "loss": 4.8796, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.22857142857142856, | |
| "grad_norm": 4.55201530456543, | |
| "learning_rate": 3.862433862433863e-05, | |
| "loss": 4.2992, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.22962962962962963, | |
| "grad_norm": 4.7580461502075195, | |
| "learning_rate": 3.857142857142858e-05, | |
| "loss": 4.7274, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.2306878306878307, | |
| "grad_norm": 5.989125728607178, | |
| "learning_rate": 3.851851851851852e-05, | |
| "loss": 4.8066, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.23174603174603176, | |
| "grad_norm": 6.322452545166016, | |
| "learning_rate": 3.846560846560846e-05, | |
| "loss": 4.669, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.2328042328042328, | |
| "grad_norm": 4.422494888305664, | |
| "learning_rate": 3.841269841269842e-05, | |
| "loss": 4.5265, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.23386243386243386, | |
| "grad_norm": 4.153601169586182, | |
| "learning_rate": 3.835978835978836e-05, | |
| "loss": 4.6917, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.23492063492063492, | |
| "grad_norm": 5.383472919464111, | |
| "learning_rate": 3.8306878306878306e-05, | |
| "loss": 4.4222, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.23597883597883598, | |
| "grad_norm": 6.448025703430176, | |
| "learning_rate": 3.8253968253968256e-05, | |
| "loss": 4.4118, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.23703703703703705, | |
| "grad_norm": 3.9466822147369385, | |
| "learning_rate": 3.8201058201058206e-05, | |
| "loss": 4.6465, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.23809523809523808, | |
| "grad_norm": 5.374645709991455, | |
| "learning_rate": 3.814814814814815e-05, | |
| "loss": 4.7006, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.23915343915343915, | |
| "grad_norm": 4.644713878631592, | |
| "learning_rate": 3.809523809523809e-05, | |
| "loss": 4.4274, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.2402116402116402, | |
| "grad_norm": 5.4732208251953125, | |
| "learning_rate": 3.804232804232805e-05, | |
| "loss": 4.5824, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.24126984126984127, | |
| "grad_norm": 4.1463189125061035, | |
| "learning_rate": 3.798941798941799e-05, | |
| "loss": 4.7644, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.24232804232804234, | |
| "grad_norm": 4.603626251220703, | |
| "learning_rate": 3.7936507936507935e-05, | |
| "loss": 4.4102, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.24338624338624337, | |
| "grad_norm": 4.667168617248535, | |
| "learning_rate": 3.7883597883597885e-05, | |
| "loss": 4.5274, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.24444444444444444, | |
| "grad_norm": 4.260847091674805, | |
| "learning_rate": 3.7830687830687835e-05, | |
| "loss": 5.0719, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.2455026455026455, | |
| "grad_norm": 5.182448863983154, | |
| "learning_rate": 3.777777777777778e-05, | |
| "loss": 4.73, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.24656084656084656, | |
| "grad_norm": 5.655933380126953, | |
| "learning_rate": 3.772486772486772e-05, | |
| "loss": 4.6116, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.24761904761904763, | |
| "grad_norm": 5.294816493988037, | |
| "learning_rate": 3.767195767195768e-05, | |
| "loss": 4.7218, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.24867724867724866, | |
| "grad_norm": 5.408359050750732, | |
| "learning_rate": 3.761904761904762e-05, | |
| "loss": 4.418, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.24973544973544973, | |
| "grad_norm": 4.957231521606445, | |
| "learning_rate": 3.7566137566137564e-05, | |
| "loss": 4.8732, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.2507936507936508, | |
| "grad_norm": 7.336751461029053, | |
| "learning_rate": 3.7513227513227514e-05, | |
| "loss": 4.1392, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.2518518518518518, | |
| "grad_norm": 4.910647869110107, | |
| "learning_rate": 3.7460317460317464e-05, | |
| "loss": 4.6966, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.2529100529100529, | |
| "grad_norm": 5.7639336585998535, | |
| "learning_rate": 3.740740740740741e-05, | |
| "loss": 4.6988, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.25396825396825395, | |
| "grad_norm": 4.529329299926758, | |
| "learning_rate": 3.735449735449736e-05, | |
| "loss": 4.3776, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.25502645502645505, | |
| "grad_norm": 5.188973903656006, | |
| "learning_rate": 3.730158730158731e-05, | |
| "loss": 4.159, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.2560846560846561, | |
| "grad_norm": 3.262760877609253, | |
| "learning_rate": 3.724867724867725e-05, | |
| "loss": 4.6907, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2571428571428571, | |
| "grad_norm": 4.056471347808838, | |
| "learning_rate": 3.7195767195767194e-05, | |
| "loss": 4.6284, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.2582010582010582, | |
| "grad_norm": 4.501747131347656, | |
| "learning_rate": 3.7142857142857143e-05, | |
| "loss": 4.58, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.25925925925925924, | |
| "grad_norm": 4.611583709716797, | |
| "learning_rate": 3.7089947089947093e-05, | |
| "loss": 4.681, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.26031746031746034, | |
| "grad_norm": 4.029916763305664, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 4.3931, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.2613756613756614, | |
| "grad_norm": 3.9643843173980713, | |
| "learning_rate": 3.6984126984126986e-05, | |
| "loss": 4.6138, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.2624338624338624, | |
| "grad_norm": 4.168216228485107, | |
| "learning_rate": 3.6931216931216936e-05, | |
| "loss": 4.3513, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.2634920634920635, | |
| "grad_norm": 3.83542537689209, | |
| "learning_rate": 3.687830687830688e-05, | |
| "loss": 4.6283, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.26455026455026454, | |
| "grad_norm": 3.7934749126434326, | |
| "learning_rate": 3.682539682539683e-05, | |
| "loss": 4.7399, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2656084656084656, | |
| "grad_norm": 4.369382381439209, | |
| "learning_rate": 3.677248677248677e-05, | |
| "loss": 4.7483, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 6.055778503417969, | |
| "learning_rate": 3.671957671957672e-05, | |
| "loss": 4.45, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2677248677248677, | |
| "grad_norm": 4.169711589813232, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 4.5306, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.2687830687830688, | |
| "grad_norm": 4.536322593688965, | |
| "learning_rate": 3.6613756613756616e-05, | |
| "loss": 4.3509, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.2698412698412698, | |
| "grad_norm": 6.118555068969727, | |
| "learning_rate": 3.6560846560846566e-05, | |
| "loss": 4.2821, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2708994708994709, | |
| "grad_norm": 5.678388595581055, | |
| "learning_rate": 3.650793650793651e-05, | |
| "loss": 4.5712, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.27195767195767195, | |
| "grad_norm": 3.9464683532714844, | |
| "learning_rate": 3.645502645502646e-05, | |
| "loss": 4.6364, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.273015873015873, | |
| "grad_norm": 4.010932922363281, | |
| "learning_rate": 3.64021164021164e-05, | |
| "loss": 4.4778, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.2740740740740741, | |
| "grad_norm": 3.9546217918395996, | |
| "learning_rate": 3.634920634920635e-05, | |
| "loss": 4.7097, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.2751322751322751, | |
| "grad_norm": 5.1796345710754395, | |
| "learning_rate": 3.62962962962963e-05, | |
| "loss": 4.6363, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2761904761904762, | |
| "grad_norm": 5.096473217010498, | |
| "learning_rate": 3.6243386243386245e-05, | |
| "loss": 4.5581, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.27724867724867724, | |
| "grad_norm": 6.787267208099365, | |
| "learning_rate": 3.619047619047619e-05, | |
| "loss": 4.4793, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.2783068783068783, | |
| "grad_norm": 3.9778239727020264, | |
| "learning_rate": 3.613756613756614e-05, | |
| "loss": 4.5243, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.27936507936507937, | |
| "grad_norm": 3.2374017238616943, | |
| "learning_rate": 3.608465608465609e-05, | |
| "loss": 4.7684, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.2804232804232804, | |
| "grad_norm": 4.345169544219971, | |
| "learning_rate": 3.603174603174603e-05, | |
| "loss": 4.3203, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.2814814814814815, | |
| "grad_norm": 5.155961990356445, | |
| "learning_rate": 3.597883597883598e-05, | |
| "loss": 4.5482, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.28253968253968254, | |
| "grad_norm": 4.2913713455200195, | |
| "learning_rate": 3.592592592592593e-05, | |
| "loss": 4.6832, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.28359788359788357, | |
| "grad_norm": 4.977283477783203, | |
| "learning_rate": 3.5873015873015874e-05, | |
| "loss": 4.3257, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.28465608465608466, | |
| "grad_norm": 4.866610050201416, | |
| "learning_rate": 3.582010582010582e-05, | |
| "loss": 4.5165, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 5.422711372375488, | |
| "learning_rate": 3.5767195767195774e-05, | |
| "loss": 4.6076, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2867724867724868, | |
| "grad_norm": 7.791525363922119, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 4.8686, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.2878306878306878, | |
| "grad_norm": 6.378711700439453, | |
| "learning_rate": 3.566137566137566e-05, | |
| "loss": 4.6424, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.28888888888888886, | |
| "grad_norm": 3.367389678955078, | |
| "learning_rate": 3.560846560846561e-05, | |
| "loss": 4.6617, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.28994708994708995, | |
| "grad_norm": 3.890188694000244, | |
| "learning_rate": 3.555555555555556e-05, | |
| "loss": 4.2508, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.291005291005291, | |
| "grad_norm": 3.5759942531585693, | |
| "learning_rate": 3.55026455026455e-05, | |
| "loss": 4.7628, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.2920634920634921, | |
| "grad_norm": 3.059518337249756, | |
| "learning_rate": 3.5449735449735446e-05, | |
| "loss": 4.5359, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.2931216931216931, | |
| "grad_norm": 4.013446807861328, | |
| "learning_rate": 3.53968253968254e-05, | |
| "loss": 4.5223, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.29417989417989415, | |
| "grad_norm": 5.019964218139648, | |
| "learning_rate": 3.5343915343915346e-05, | |
| "loss": 4.456, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.29523809523809524, | |
| "grad_norm": 3.2760136127471924, | |
| "learning_rate": 3.529100529100529e-05, | |
| "loss": 4.5636, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.2962962962962963, | |
| "grad_norm": 5.238942623138428, | |
| "learning_rate": 3.523809523809524e-05, | |
| "loss": 4.5664, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.29735449735449737, | |
| "grad_norm": 5.568114280700684, | |
| "learning_rate": 3.518518518518519e-05, | |
| "loss": 4.5683, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.2984126984126984, | |
| "grad_norm": 3.24711537361145, | |
| "learning_rate": 3.513227513227513e-05, | |
| "loss": 4.5822, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.29947089947089944, | |
| "grad_norm": 4.6277313232421875, | |
| "learning_rate": 3.5079365079365075e-05, | |
| "loss": 4.6366, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.30052910052910053, | |
| "grad_norm": 3.8839879035949707, | |
| "learning_rate": 3.502645502645503e-05, | |
| "loss": 4.5388, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.30158730158730157, | |
| "grad_norm": 7.662173271179199, | |
| "learning_rate": 3.4973544973544975e-05, | |
| "loss": 4.2736, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.30264550264550266, | |
| "grad_norm": 5.089264869689941, | |
| "learning_rate": 3.492063492063492e-05, | |
| "loss": 4.6195, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3037037037037037, | |
| "grad_norm": 3.990954637527466, | |
| "learning_rate": 3.486772486772487e-05, | |
| "loss": 4.4885, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.3047619047619048, | |
| "grad_norm": 4.801586151123047, | |
| "learning_rate": 3.481481481481482e-05, | |
| "loss": 4.5381, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3058201058201058, | |
| "grad_norm": 5.5773210525512695, | |
| "learning_rate": 3.476190476190476e-05, | |
| "loss": 4.5179, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.30687830687830686, | |
| "grad_norm": 3.99760103225708, | |
| "learning_rate": 3.470899470899471e-05, | |
| "loss": 4.6765, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.30793650793650795, | |
| "grad_norm": 4.964691638946533, | |
| "learning_rate": 3.465608465608466e-05, | |
| "loss": 4.4914, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.308994708994709, | |
| "grad_norm": 3.3278160095214844, | |
| "learning_rate": 3.4603174603174604e-05, | |
| "loss": 4.4241, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.3100529100529101, | |
| "grad_norm": 6.8910675048828125, | |
| "learning_rate": 3.455026455026455e-05, | |
| "loss": 4.8309, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.3111111111111111, | |
| "grad_norm": 3.6537747383117676, | |
| "learning_rate": 3.44973544973545e-05, | |
| "loss": 4.4105, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.31216931216931215, | |
| "grad_norm": 3.973331928253174, | |
| "learning_rate": 3.444444444444445e-05, | |
| "loss": 4.6153, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.31322751322751324, | |
| "grad_norm": 6.508915424346924, | |
| "learning_rate": 3.439153439153439e-05, | |
| "loss": 4.4718, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.3142857142857143, | |
| "grad_norm": 5.570568561553955, | |
| "learning_rate": 3.433862433862434e-05, | |
| "loss": 4.6205, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.31534391534391537, | |
| "grad_norm": 4.15952205657959, | |
| "learning_rate": 3.428571428571429e-05, | |
| "loss": 4.7089, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3164021164021164, | |
| "grad_norm": 5.480037212371826, | |
| "learning_rate": 3.4232804232804234e-05, | |
| "loss": 4.7241, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.31746031746031744, | |
| "grad_norm": 4.26448917388916, | |
| "learning_rate": 3.4179894179894184e-05, | |
| "loss": 4.5638, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.31851851851851853, | |
| "grad_norm": 6.337195873260498, | |
| "learning_rate": 3.412698412698413e-05, | |
| "loss": 4.4273, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.31957671957671957, | |
| "grad_norm": 5.988368988037109, | |
| "learning_rate": 3.4074074074074077e-05, | |
| "loss": 4.7023, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.32063492063492066, | |
| "grad_norm": 5.5184245109558105, | |
| "learning_rate": 3.402116402116402e-05, | |
| "loss": 4.5858, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.3216931216931217, | |
| "grad_norm": 5.176582336425781, | |
| "learning_rate": 3.396825396825397e-05, | |
| "loss": 4.4652, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.32275132275132273, | |
| "grad_norm": 4.534494400024414, | |
| "learning_rate": 3.391534391534392e-05, | |
| "loss": 4.537, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.3238095238095238, | |
| "grad_norm": 5.228606224060059, | |
| "learning_rate": 3.386243386243386e-05, | |
| "loss": 4.6445, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.32486772486772486, | |
| "grad_norm": 3.956960678100586, | |
| "learning_rate": 3.380952380952381e-05, | |
| "loss": 4.4717, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.32592592592592595, | |
| "grad_norm": 4.401339530944824, | |
| "learning_rate": 3.3756613756613756e-05, | |
| "loss": 4.3484, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.326984126984127, | |
| "grad_norm": 4.009274482727051, | |
| "learning_rate": 3.3703703703703706e-05, | |
| "loss": 4.4202, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.328042328042328, | |
| "grad_norm": 3.2340402603149414, | |
| "learning_rate": 3.3650793650793656e-05, | |
| "loss": 4.5176, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3291005291005291, | |
| "grad_norm": 3.553013563156128, | |
| "learning_rate": 3.35978835978836e-05, | |
| "loss": 4.598, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.33015873015873015, | |
| "grad_norm": 6.81786584854126, | |
| "learning_rate": 3.354497354497355e-05, | |
| "loss": 4.2469, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.33121693121693124, | |
| "grad_norm": 4.922097682952881, | |
| "learning_rate": 3.349206349206349e-05, | |
| "loss": 4.6131, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.3322751322751323, | |
| "grad_norm": 4.777652263641357, | |
| "learning_rate": 3.343915343915344e-05, | |
| "loss": 4.7549, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 3.2790966033935547, | |
| "learning_rate": 3.3386243386243385e-05, | |
| "loss": 4.4934, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3343915343915344, | |
| "grad_norm": 4.401710033416748, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 4.5042, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.33544973544973544, | |
| "grad_norm": 5.545312404632568, | |
| "learning_rate": 3.3280423280423285e-05, | |
| "loss": 4.3179, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.33650793650793653, | |
| "grad_norm": 3.8146395683288574, | |
| "learning_rate": 3.322751322751323e-05, | |
| "loss": 4.5358, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.33756613756613757, | |
| "grad_norm": 4.654644966125488, | |
| "learning_rate": 3.317460317460318e-05, | |
| "loss": 4.5204, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.3386243386243386, | |
| "grad_norm": 4.144741535186768, | |
| "learning_rate": 3.312169312169313e-05, | |
| "loss": 4.93, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3396825396825397, | |
| "grad_norm": 4.340035915374756, | |
| "learning_rate": 3.306878306878307e-05, | |
| "loss": 4.2467, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.34074074074074073, | |
| "grad_norm": 4.59688663482666, | |
| "learning_rate": 3.3015873015873014e-05, | |
| "loss": 4.3413, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.3417989417989418, | |
| "grad_norm": 3.6761655807495117, | |
| "learning_rate": 3.2962962962962964e-05, | |
| "loss": 4.4834, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.34285714285714286, | |
| "grad_norm": 5.661898612976074, | |
| "learning_rate": 3.2910052910052914e-05, | |
| "loss": 4.516, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.3439153439153439, | |
| "grad_norm": 2.8504817485809326, | |
| "learning_rate": 3.285714285714286e-05, | |
| "loss": 4.55, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.344973544973545, | |
| "grad_norm": 3.2584402561187744, | |
| "learning_rate": 3.280423280423281e-05, | |
| "loss": 4.4524, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.346031746031746, | |
| "grad_norm": 3.2479212284088135, | |
| "learning_rate": 3.275132275132276e-05, | |
| "loss": 4.5226, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.3470899470899471, | |
| "grad_norm": 5.987076282501221, | |
| "learning_rate": 3.26984126984127e-05, | |
| "loss": 4.4857, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.34814814814814815, | |
| "grad_norm": 5.704360485076904, | |
| "learning_rate": 3.264550264550264e-05, | |
| "loss": 4.5853, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.3492063492063492, | |
| "grad_norm": 3.879491090774536, | |
| "learning_rate": 3.25925925925926e-05, | |
| "loss": 4.6353, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3502645502645503, | |
| "grad_norm": 4.888803958892822, | |
| "learning_rate": 3.253968253968254e-05, | |
| "loss": 4.777, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.3513227513227513, | |
| "grad_norm": 3.1391172409057617, | |
| "learning_rate": 3.2486772486772486e-05, | |
| "loss": 4.6797, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.3523809523809524, | |
| "grad_norm": 3.772940158843994, | |
| "learning_rate": 3.2433862433862436e-05, | |
| "loss": 4.4289, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.35343915343915344, | |
| "grad_norm": 6.232966423034668, | |
| "learning_rate": 3.2380952380952386e-05, | |
| "loss": 4.7493, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.3544973544973545, | |
| "grad_norm": 3.59843373298645, | |
| "learning_rate": 3.232804232804233e-05, | |
| "loss": 4.6664, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 4.1545538902282715, | |
| "learning_rate": 3.227513227513227e-05, | |
| "loss": 4.3846, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.3566137566137566, | |
| "grad_norm": 3.4982540607452393, | |
| "learning_rate": 3.222222222222223e-05, | |
| "loss": 4.5178, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.3576719576719577, | |
| "grad_norm": 3.8578543663024902, | |
| "learning_rate": 3.216931216931217e-05, | |
| "loss": 4.4057, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.35873015873015873, | |
| "grad_norm": 4.9274678230285645, | |
| "learning_rate": 3.2116402116402115e-05, | |
| "loss": 4.6315, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.35978835978835977, | |
| "grad_norm": 3.3767309188842773, | |
| "learning_rate": 3.2063492063492065e-05, | |
| "loss": 4.4462, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.36084656084656086, | |
| "grad_norm": 5.290660858154297, | |
| "learning_rate": 3.2010582010582015e-05, | |
| "loss": 4.4087, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.3619047619047619, | |
| "grad_norm": 3.6890292167663574, | |
| "learning_rate": 3.195767195767196e-05, | |
| "loss": 4.2977, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.362962962962963, | |
| "grad_norm": 4.656151294708252, | |
| "learning_rate": 3.19047619047619e-05, | |
| "loss": 4.2439, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.364021164021164, | |
| "grad_norm": 3.621307611465454, | |
| "learning_rate": 3.185185185185185e-05, | |
| "loss": 4.4836, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.36507936507936506, | |
| "grad_norm": 3.977259874343872, | |
| "learning_rate": 3.17989417989418e-05, | |
| "loss": 4.4694, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.36613756613756615, | |
| "grad_norm": 3.787940263748169, | |
| "learning_rate": 3.1746031746031745e-05, | |
| "loss": 4.3896, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.3671957671957672, | |
| "grad_norm": 6.7594451904296875, | |
| "learning_rate": 3.1693121693121695e-05, | |
| "loss": 4.4299, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.3682539682539683, | |
| "grad_norm": 4.46245813369751, | |
| "learning_rate": 3.1640211640211645e-05, | |
| "loss": 4.9394, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.3693121693121693, | |
| "grad_norm": 4.181535720825195, | |
| "learning_rate": 3.158730158730159e-05, | |
| "loss": 4.6598, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.37037037037037035, | |
| "grad_norm": 2.9622275829315186, | |
| "learning_rate": 3.153439153439154e-05, | |
| "loss": 4.6615, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.37142857142857144, | |
| "grad_norm": 6.954951763153076, | |
| "learning_rate": 3.148148148148148e-05, | |
| "loss": 4.5004, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.3724867724867725, | |
| "grad_norm": 5.163333415985107, | |
| "learning_rate": 3.142857142857143e-05, | |
| "loss": 4.4917, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.37354497354497357, | |
| "grad_norm": 4.577300548553467, | |
| "learning_rate": 3.1375661375661374e-05, | |
| "loss": 4.5335, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.3746031746031746, | |
| "grad_norm": 3.999112367630005, | |
| "learning_rate": 3.1322751322751324e-05, | |
| "loss": 4.4476, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.37566137566137564, | |
| "grad_norm": 4.00216007232666, | |
| "learning_rate": 3.1269841269841274e-05, | |
| "loss": 4.4549, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.37671957671957673, | |
| "grad_norm": 5.307511329650879, | |
| "learning_rate": 3.121693121693122e-05, | |
| "loss": 4.6019, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.37777777777777777, | |
| "grad_norm": 2.6185190677642822, | |
| "learning_rate": 3.116402116402117e-05, | |
| "loss": 4.5182, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.37883597883597886, | |
| "grad_norm": 4.905261039733887, | |
| "learning_rate": 3.111111111111111e-05, | |
| "loss": 4.4365, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.3798941798941799, | |
| "grad_norm": 3.4827020168304443, | |
| "learning_rate": 3.105820105820106e-05, | |
| "loss": 4.5379, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "grad_norm": 5.549962997436523, | |
| "learning_rate": 3.100529100529101e-05, | |
| "loss": 4.4842, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.382010582010582, | |
| "grad_norm": 3.405832052230835, | |
| "learning_rate": 3.095238095238095e-05, | |
| "loss": 4.4383, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.38306878306878306, | |
| "grad_norm": 4.3347673416137695, | |
| "learning_rate": 3.08994708994709e-05, | |
| "loss": 4.5793, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.38412698412698415, | |
| "grad_norm": 4.637452602386475, | |
| "learning_rate": 3.0846560846560846e-05, | |
| "loss": 4.4116, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.3851851851851852, | |
| "grad_norm": 3.653627395629883, | |
| "learning_rate": 3.0793650793650796e-05, | |
| "loss": 4.608, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.3862433862433862, | |
| "grad_norm": 3.076195240020752, | |
| "learning_rate": 3.074074074074074e-05, | |
| "loss": 4.4001, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.3873015873015873, | |
| "grad_norm": 5.166315078735352, | |
| "learning_rate": 3.068783068783069e-05, | |
| "loss": 4.1011, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.38835978835978835, | |
| "grad_norm": 6.1348795890808105, | |
| "learning_rate": 3.063492063492064e-05, | |
| "loss": 4.6797, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.38941798941798944, | |
| "grad_norm": 2.963944673538208, | |
| "learning_rate": 3.058201058201058e-05, | |
| "loss": 4.3261, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.3904761904761905, | |
| "grad_norm": 4.195929527282715, | |
| "learning_rate": 3.052910052910053e-05, | |
| "loss": 4.5108, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.3915343915343915, | |
| "grad_norm": 4.026662349700928, | |
| "learning_rate": 3.0476190476190482e-05, | |
| "loss": 4.2837, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3925925925925926, | |
| "grad_norm": 4.093654155731201, | |
| "learning_rate": 3.0423280423280425e-05, | |
| "loss": 4.6069, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.39365079365079364, | |
| "grad_norm": 5.716070652008057, | |
| "learning_rate": 3.037037037037037e-05, | |
| "loss": 4.4523, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.39470899470899473, | |
| "grad_norm": 3.701359748840332, | |
| "learning_rate": 3.0317460317460318e-05, | |
| "loss": 4.4073, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.39576719576719577, | |
| "grad_norm": 7.2860260009765625, | |
| "learning_rate": 3.0264550264550268e-05, | |
| "loss": 4.3031, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.3968253968253968, | |
| "grad_norm": 6.570174217224121, | |
| "learning_rate": 3.0211640211640215e-05, | |
| "loss": 4.4551, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.3978835978835979, | |
| "grad_norm": 4.895565509796143, | |
| "learning_rate": 3.0158730158730158e-05, | |
| "loss": 4.4219, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.39894179894179893, | |
| "grad_norm": 7.9695844650268555, | |
| "learning_rate": 3.010582010582011e-05, | |
| "loss": 4.4131, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 4.441410064697266, | |
| "learning_rate": 3.0052910052910054e-05, | |
| "loss": 4.4745, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.40105820105820106, | |
| "grad_norm": 3.7005107402801514, | |
| "learning_rate": 3e-05, | |
| "loss": 4.3727, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.4021164021164021, | |
| "grad_norm": 5.960515975952148, | |
| "learning_rate": 2.9947089947089947e-05, | |
| "loss": 4.5026, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4031746031746032, | |
| "grad_norm": 3.2702596187591553, | |
| "learning_rate": 2.9894179894179897e-05, | |
| "loss": 4.5372, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.4042328042328042, | |
| "grad_norm": 4.725318908691406, | |
| "learning_rate": 2.9841269841269844e-05, | |
| "loss": 4.4181, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.4052910052910053, | |
| "grad_norm": 6.9476847648620605, | |
| "learning_rate": 2.9788359788359787e-05, | |
| "loss": 4.2138, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.40634920634920635, | |
| "grad_norm": 3.7959463596343994, | |
| "learning_rate": 2.973544973544974e-05, | |
| "loss": 4.4974, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.4074074074074074, | |
| "grad_norm": 2.9990975856781006, | |
| "learning_rate": 2.9682539682539683e-05, | |
| "loss": 4.4096, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.4084656084656085, | |
| "grad_norm": 6.560277462005615, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 4.9725, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.4095238095238095, | |
| "grad_norm": 3.6386265754699707, | |
| "learning_rate": 2.957671957671958e-05, | |
| "loss": 4.5124, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.4105820105820106, | |
| "grad_norm": 3.7434399127960205, | |
| "learning_rate": 2.9523809523809526e-05, | |
| "loss": 4.308, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.41164021164021164, | |
| "grad_norm": 4.926546096801758, | |
| "learning_rate": 2.9470899470899473e-05, | |
| "loss": 4.44, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.4126984126984127, | |
| "grad_norm": 4.1928510665893555, | |
| "learning_rate": 2.9417989417989416e-05, | |
| "loss": 4.588, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.41375661375661377, | |
| "grad_norm": 3.4188411235809326, | |
| "learning_rate": 2.9365079365079366e-05, | |
| "loss": 4.3135, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.4148148148148148, | |
| "grad_norm": 4.299923896789551, | |
| "learning_rate": 2.9312169312169313e-05, | |
| "loss": 4.4664, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4158730158730159, | |
| "grad_norm": 5.515903472900391, | |
| "learning_rate": 2.925925925925926e-05, | |
| "loss": 4.5932, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.41693121693121693, | |
| "grad_norm": 4.561779022216797, | |
| "learning_rate": 2.920634920634921e-05, | |
| "loss": 4.4384, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.41798941798941797, | |
| "grad_norm": 4.012057781219482, | |
| "learning_rate": 2.9153439153439156e-05, | |
| "loss": 4.3051, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.41904761904761906, | |
| "grad_norm": 4.529195785522461, | |
| "learning_rate": 2.91005291005291e-05, | |
| "loss": 4.0737, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.4201058201058201, | |
| "grad_norm": 4.308627128601074, | |
| "learning_rate": 2.9047619047619052e-05, | |
| "loss": 4.2985, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.4211640211640212, | |
| "grad_norm": 3.731900930404663, | |
| "learning_rate": 2.8994708994708995e-05, | |
| "loss": 4.5682, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.4222222222222222, | |
| "grad_norm": 3.0067977905273438, | |
| "learning_rate": 2.8941798941798942e-05, | |
| "loss": 4.4598, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.42328042328042326, | |
| "grad_norm": 5.660052299499512, | |
| "learning_rate": 2.8888888888888888e-05, | |
| "loss": 4.2262, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.42433862433862435, | |
| "grad_norm": 3.870854139328003, | |
| "learning_rate": 2.8835978835978838e-05, | |
| "loss": 4.4143, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.4253968253968254, | |
| "grad_norm": 4.174057483673096, | |
| "learning_rate": 2.8783068783068785e-05, | |
| "loss": 4.3944, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.4264550264550265, | |
| "grad_norm": 3.0973563194274902, | |
| "learning_rate": 2.8730158730158728e-05, | |
| "loss": 4.6079, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.4275132275132275, | |
| "grad_norm": 3.542325735092163, | |
| "learning_rate": 2.867724867724868e-05, | |
| "loss": 4.5103, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 5.6434712409973145, | |
| "learning_rate": 2.8624338624338624e-05, | |
| "loss": 4.6433, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.42962962962962964, | |
| "grad_norm": 4.372058868408203, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 4.4411, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4306878306878307, | |
| "grad_norm": 4.075821876525879, | |
| "learning_rate": 2.851851851851852e-05, | |
| "loss": 4.5535, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.43174603174603177, | |
| "grad_norm": 3.91575288772583, | |
| "learning_rate": 2.8465608465608467e-05, | |
| "loss": 4.697, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.4328042328042328, | |
| "grad_norm": 5.666733741760254, | |
| "learning_rate": 2.8412698412698414e-05, | |
| "loss": 4.4831, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.43386243386243384, | |
| "grad_norm": 6.003392696380615, | |
| "learning_rate": 2.8359788359788357e-05, | |
| "loss": 4.1948, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.43492063492063493, | |
| "grad_norm": 3.3927369117736816, | |
| "learning_rate": 2.830687830687831e-05, | |
| "loss": 4.361, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.43597883597883597, | |
| "grad_norm": 4.443639278411865, | |
| "learning_rate": 2.8253968253968253e-05, | |
| "loss": 4.1115, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.43703703703703706, | |
| "grad_norm": 3.2199268341064453, | |
| "learning_rate": 2.82010582010582e-05, | |
| "loss": 4.5649, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.4380952380952381, | |
| "grad_norm": 5.219230651855469, | |
| "learning_rate": 2.814814814814815e-05, | |
| "loss": 4.3149, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.43915343915343913, | |
| "grad_norm": 4.251458644866943, | |
| "learning_rate": 2.8095238095238096e-05, | |
| "loss": 4.5838, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.4402116402116402, | |
| "grad_norm": 3.8369529247283936, | |
| "learning_rate": 2.8042328042328043e-05, | |
| "loss": 4.6192, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.44126984126984126, | |
| "grad_norm": 5.307433128356934, | |
| "learning_rate": 2.7989417989417993e-05, | |
| "loss": 4.434, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.44232804232804235, | |
| "grad_norm": 4.317311763763428, | |
| "learning_rate": 2.793650793650794e-05, | |
| "loss": 4.4022, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.4433862433862434, | |
| "grad_norm": 4.3074259757995605, | |
| "learning_rate": 2.7883597883597883e-05, | |
| "loss": 4.5237, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 5.363450050354004, | |
| "learning_rate": 2.783068783068783e-05, | |
| "loss": 4.6406, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4455026455026455, | |
| "grad_norm": 4.091902256011963, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 4.3964, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.44656084656084655, | |
| "grad_norm": 4.199675559997559, | |
| "learning_rate": 2.7724867724867726e-05, | |
| "loss": 4.5985, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.44761904761904764, | |
| "grad_norm": 3.6524128913879395, | |
| "learning_rate": 2.7671957671957672e-05, | |
| "loss": 4.7435, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.4486772486772487, | |
| "grad_norm": 3.6887259483337402, | |
| "learning_rate": 2.7619047619047622e-05, | |
| "loss": 4.5237, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.4497354497354497, | |
| "grad_norm": 5.04093599319458, | |
| "learning_rate": 2.756613756613757e-05, | |
| "loss": 4.3478, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.4507936507936508, | |
| "grad_norm": 4.271005153656006, | |
| "learning_rate": 2.7513227513227512e-05, | |
| "loss": 4.2349, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.45185185185185184, | |
| "grad_norm": 4.623360633850098, | |
| "learning_rate": 2.7460317460317465e-05, | |
| "loss": 4.0299, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.45291005291005293, | |
| "grad_norm": 3.6036221981048584, | |
| "learning_rate": 2.7407407407407408e-05, | |
| "loss": 4.5202, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.45396825396825397, | |
| "grad_norm": 5.633344650268555, | |
| "learning_rate": 2.7354497354497355e-05, | |
| "loss": 4.4543, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.455026455026455, | |
| "grad_norm": 5.114171504974365, | |
| "learning_rate": 2.73015873015873e-05, | |
| "loss": 4.4791, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4560846560846561, | |
| "grad_norm": 4.855916500091553, | |
| "learning_rate": 2.724867724867725e-05, | |
| "loss": 4.4991, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.45714285714285713, | |
| "grad_norm": 5.824271202087402, | |
| "learning_rate": 2.7195767195767198e-05, | |
| "loss": 4.248, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.4582010582010582, | |
| "grad_norm": 4.154948711395264, | |
| "learning_rate": 2.714285714285714e-05, | |
| "loss": 4.2861, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.45925925925925926, | |
| "grad_norm": 3.779582977294922, | |
| "learning_rate": 2.7089947089947094e-05, | |
| "loss": 4.2374, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.4603174603174603, | |
| "grad_norm": 2.884605884552002, | |
| "learning_rate": 2.7037037037037037e-05, | |
| "loss": 4.2192, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4613756613756614, | |
| "grad_norm": 4.426102638244629, | |
| "learning_rate": 2.6984126984126984e-05, | |
| "loss": 4.23, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.4624338624338624, | |
| "grad_norm": 4.26328706741333, | |
| "learning_rate": 2.6931216931216934e-05, | |
| "loss": 4.4972, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.4634920634920635, | |
| "grad_norm": 4.900135517120361, | |
| "learning_rate": 2.687830687830688e-05, | |
| "loss": 4.3481, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.46455026455026455, | |
| "grad_norm": 3.687103033065796, | |
| "learning_rate": 2.6825396825396827e-05, | |
| "loss": 4.5606, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.4656084656084656, | |
| "grad_norm": 5.069578170776367, | |
| "learning_rate": 2.677248677248677e-05, | |
| "loss": 4.3747, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.4666666666666667, | |
| "grad_norm": 3.736144542694092, | |
| "learning_rate": 2.6719576719576723e-05, | |
| "loss": 4.4117, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.4677248677248677, | |
| "grad_norm": 3.2292275428771973, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 4.6203, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.4687830687830688, | |
| "grad_norm": 4.4302778244018555, | |
| "learning_rate": 2.6613756613756613e-05, | |
| "loss": 4.3818, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.46984126984126984, | |
| "grad_norm": 5.112982273101807, | |
| "learning_rate": 2.6560846560846563e-05, | |
| "loss": 4.2857, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.4708994708994709, | |
| "grad_norm": 4.428035736083984, | |
| "learning_rate": 2.650793650793651e-05, | |
| "loss": 4.2864, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.47195767195767196, | |
| "grad_norm": 2.9887120723724365, | |
| "learning_rate": 2.6455026455026456e-05, | |
| "loss": 4.4141, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.473015873015873, | |
| "grad_norm": 4.006242275238037, | |
| "learning_rate": 2.6402116402116406e-05, | |
| "loss": 4.4684, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.4740740740740741, | |
| "grad_norm": 6.761603355407715, | |
| "learning_rate": 2.6349206349206353e-05, | |
| "loss": 3.7916, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.47513227513227513, | |
| "grad_norm": 4.260864734649658, | |
| "learning_rate": 2.6296296296296296e-05, | |
| "loss": 4.5701, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "grad_norm": 4.10745906829834, | |
| "learning_rate": 2.6243386243386242e-05, | |
| "loss": 4.5473, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.47724867724867726, | |
| "grad_norm": 5.057222843170166, | |
| "learning_rate": 2.6190476190476192e-05, | |
| "loss": 4.0021, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.4783068783068783, | |
| "grad_norm": 3.276907205581665, | |
| "learning_rate": 2.613756613756614e-05, | |
| "loss": 4.3013, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.4793650793650794, | |
| "grad_norm": 4.663476467132568, | |
| "learning_rate": 2.6084656084656085e-05, | |
| "loss": 4.0696, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.4804232804232804, | |
| "grad_norm": 3.4896063804626465, | |
| "learning_rate": 2.6031746031746035e-05, | |
| "loss": 4.5279, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.48148148148148145, | |
| "grad_norm": 4.717653274536133, | |
| "learning_rate": 2.5978835978835982e-05, | |
| "loss": 4.256, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.48253968253968255, | |
| "grad_norm": 2.9412264823913574, | |
| "learning_rate": 2.5925925925925925e-05, | |
| "loss": 4.5379, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.4835978835978836, | |
| "grad_norm": 5.3735671043396, | |
| "learning_rate": 2.5873015873015878e-05, | |
| "loss": 4.5428, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.4846560846560847, | |
| "grad_norm": 3.9320805072784424, | |
| "learning_rate": 2.582010582010582e-05, | |
| "loss": 4.2526, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.4857142857142857, | |
| "grad_norm": 5.301612854003906, | |
| "learning_rate": 2.5767195767195768e-05, | |
| "loss": 4.3464, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.48677248677248675, | |
| "grad_norm": 6.028964996337891, | |
| "learning_rate": 2.5714285714285714e-05, | |
| "loss": 4.2735, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.48783068783068784, | |
| "grad_norm": 3.6883628368377686, | |
| "learning_rate": 2.5661375661375664e-05, | |
| "loss": 4.2878, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.4888888888888889, | |
| "grad_norm": 3.1534693241119385, | |
| "learning_rate": 2.560846560846561e-05, | |
| "loss": 4.6231, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.48994708994708996, | |
| "grad_norm": 3.1988255977630615, | |
| "learning_rate": 2.5555555555555554e-05, | |
| "loss": 4.4659, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.491005291005291, | |
| "grad_norm": 6.167055606842041, | |
| "learning_rate": 2.5502645502645507e-05, | |
| "loss": 4.0693, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.49206349206349204, | |
| "grad_norm": 4.961990833282471, | |
| "learning_rate": 2.544973544973545e-05, | |
| "loss": 4.6526, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.4931216931216931, | |
| "grad_norm": 4.159661293029785, | |
| "learning_rate": 2.5396825396825397e-05, | |
| "loss": 4.3308, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.49417989417989416, | |
| "grad_norm": 3.0661191940307617, | |
| "learning_rate": 2.5343915343915347e-05, | |
| "loss": 4.3929, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.49523809523809526, | |
| "grad_norm": 3.240173816680908, | |
| "learning_rate": 2.5291005291005294e-05, | |
| "loss": 4.3422, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.4962962962962963, | |
| "grad_norm": 3.9534902572631836, | |
| "learning_rate": 2.523809523809524e-05, | |
| "loss": 4.4378, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.4973544973544973, | |
| "grad_norm": 4.109508037567139, | |
| "learning_rate": 2.5185185185185183e-05, | |
| "loss": 4.313, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.4984126984126984, | |
| "grad_norm": 3.0627992153167725, | |
| "learning_rate": 2.5132275132275137e-05, | |
| "loss": 4.4831, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.49947089947089945, | |
| "grad_norm": 3.873530149459839, | |
| "learning_rate": 2.507936507936508e-05, | |
| "loss": 4.4164, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5005291005291005, | |
| "grad_norm": 3.79267954826355, | |
| "learning_rate": 2.5026455026455026e-05, | |
| "loss": 4.4306, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.5015873015873016, | |
| "grad_norm": 4.215601921081543, | |
| "learning_rate": 2.4973544973544973e-05, | |
| "loss": 4.4788, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5026455026455027, | |
| "grad_norm": 3.260300874710083, | |
| "learning_rate": 2.4920634920634923e-05, | |
| "loss": 4.3694, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.5037037037037037, | |
| "grad_norm": 4.4810404777526855, | |
| "learning_rate": 2.4867724867724866e-05, | |
| "loss": 4.4525, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5047619047619047, | |
| "grad_norm": 4.280769348144531, | |
| "learning_rate": 2.4814814814814816e-05, | |
| "loss": 4.2847, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.5058201058201058, | |
| "grad_norm": 5.8798298835754395, | |
| "learning_rate": 2.4761904761904762e-05, | |
| "loss": 4.3589, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.5068783068783069, | |
| "grad_norm": 4.679714679718018, | |
| "learning_rate": 2.470899470899471e-05, | |
| "loss": 4.3346, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.5079365079365079, | |
| "grad_norm": 7.5125322341918945, | |
| "learning_rate": 2.465608465608466e-05, | |
| "loss": 4.2475, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.508994708994709, | |
| "grad_norm": 5.064920425415039, | |
| "learning_rate": 2.4603174603174602e-05, | |
| "loss": 4.2715, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.5100529100529101, | |
| "grad_norm": 3.553298234939575, | |
| "learning_rate": 2.4550264550264552e-05, | |
| "loss": 4.3644, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.5111111111111111, | |
| "grad_norm": 5.128535747528076, | |
| "learning_rate": 2.44973544973545e-05, | |
| "loss": 4.5012, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.5121693121693122, | |
| "grad_norm": 6.287095546722412, | |
| "learning_rate": 2.4444444444444445e-05, | |
| "loss": 4.3774, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5132275132275133, | |
| "grad_norm": 6.201144218444824, | |
| "learning_rate": 2.439153439153439e-05, | |
| "loss": 4.542, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.5142857142857142, | |
| "grad_norm": 5.993530750274658, | |
| "learning_rate": 2.4338624338624338e-05, | |
| "loss": 4.6664, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5153439153439153, | |
| "grad_norm": 5.866735458374023, | |
| "learning_rate": 2.4285714285714288e-05, | |
| "loss": 4.4555, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.5164021164021164, | |
| "grad_norm": 4.489416599273682, | |
| "learning_rate": 2.4232804232804234e-05, | |
| "loss": 4.5308, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5174603174603175, | |
| "grad_norm": 6.804817199707031, | |
| "learning_rate": 2.417989417989418e-05, | |
| "loss": 4.2786, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.5185185185185185, | |
| "grad_norm": 4.931401252746582, | |
| "learning_rate": 2.4126984126984128e-05, | |
| "loss": 4.3546, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5195767195767196, | |
| "grad_norm": 4.534653186798096, | |
| "learning_rate": 2.4074074074074074e-05, | |
| "loss": 4.4706, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.5206349206349207, | |
| "grad_norm": 3.931093692779541, | |
| "learning_rate": 2.402116402116402e-05, | |
| "loss": 4.2993, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.5216931216931217, | |
| "grad_norm": 4.178028106689453, | |
| "learning_rate": 2.396825396825397e-05, | |
| "loss": 4.3037, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.5227513227513227, | |
| "grad_norm": 3.1427457332611084, | |
| "learning_rate": 2.3915343915343917e-05, | |
| "loss": 4.377, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.5238095238095238, | |
| "grad_norm": 4.532064437866211, | |
| "learning_rate": 2.3862433862433864e-05, | |
| "loss": 4.5601, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.5248677248677248, | |
| "grad_norm": 5.367987155914307, | |
| "learning_rate": 2.380952380952381e-05, | |
| "loss": 4.3662, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.5259259259259259, | |
| "grad_norm": 4.349072456359863, | |
| "learning_rate": 2.3756613756613757e-05, | |
| "loss": 4.2585, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.526984126984127, | |
| "grad_norm": 4.508454322814941, | |
| "learning_rate": 2.3703703703703707e-05, | |
| "loss": 4.3033, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.5280423280423281, | |
| "grad_norm": 3.51326060295105, | |
| "learning_rate": 2.365079365079365e-05, | |
| "loss": 4.3074, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.5291005291005291, | |
| "grad_norm": 4.635214328765869, | |
| "learning_rate": 2.35978835978836e-05, | |
| "loss": 4.5061, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5301587301587302, | |
| "grad_norm": 7.175034523010254, | |
| "learning_rate": 2.3544973544973546e-05, | |
| "loss": 4.5246, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.5312169312169313, | |
| "grad_norm": 7.556386947631836, | |
| "learning_rate": 2.3492063492063493e-05, | |
| "loss": 4.5628, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.5322751322751322, | |
| "grad_norm": 5.281272888183594, | |
| "learning_rate": 2.3439153439153443e-05, | |
| "loss": 4.5511, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 4.228816986083984, | |
| "learning_rate": 2.3386243386243386e-05, | |
| "loss": 4.7682, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.5343915343915344, | |
| "grad_norm": 4.464179992675781, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 4.2103, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.5354497354497354, | |
| "grad_norm": 2.9484353065490723, | |
| "learning_rate": 2.328042328042328e-05, | |
| "loss": 4.3896, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.5365079365079365, | |
| "grad_norm": 3.5408449172973633, | |
| "learning_rate": 2.322751322751323e-05, | |
| "loss": 4.5458, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.5375661375661376, | |
| "grad_norm": 4.470260143280029, | |
| "learning_rate": 2.3174603174603175e-05, | |
| "loss": 4.3186, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.5386243386243387, | |
| "grad_norm": 4.546544551849365, | |
| "learning_rate": 2.3121693121693122e-05, | |
| "loss": 4.6774, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.5396825396825397, | |
| "grad_norm": 3.3700125217437744, | |
| "learning_rate": 2.3068783068783072e-05, | |
| "loss": 4.3627, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5407407407407407, | |
| "grad_norm": 4.171989440917969, | |
| "learning_rate": 2.3015873015873015e-05, | |
| "loss": 4.5353, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.5417989417989418, | |
| "grad_norm": 3.360245943069458, | |
| "learning_rate": 2.2962962962962965e-05, | |
| "loss": 4.2469, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.5428571428571428, | |
| "grad_norm": 4.016007900238037, | |
| "learning_rate": 2.291005291005291e-05, | |
| "loss": 4.7331, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.5439153439153439, | |
| "grad_norm": 4.515334606170654, | |
| "learning_rate": 2.2857142857142858e-05, | |
| "loss": 4.1892, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.544973544973545, | |
| "grad_norm": 5.877944469451904, | |
| "learning_rate": 2.2804232804232805e-05, | |
| "loss": 3.928, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.546031746031746, | |
| "grad_norm": 4.498457908630371, | |
| "learning_rate": 2.275132275132275e-05, | |
| "loss": 4.667, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.5470899470899471, | |
| "grad_norm": 4.965441703796387, | |
| "learning_rate": 2.2698412698412698e-05, | |
| "loss": 4.2019, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.5481481481481482, | |
| "grad_norm": 3.545275926589966, | |
| "learning_rate": 2.2645502645502648e-05, | |
| "loss": 4.3709, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.5492063492063493, | |
| "grad_norm": 4.3779191970825195, | |
| "learning_rate": 2.2592592592592594e-05, | |
| "loss": 4.4913, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.5502645502645502, | |
| "grad_norm": 4.215112209320068, | |
| "learning_rate": 2.253968253968254e-05, | |
| "loss": 4.5574, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5513227513227513, | |
| "grad_norm": 5.649901866912842, | |
| "learning_rate": 2.2486772486772487e-05, | |
| "loss": 4.2828, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.5523809523809524, | |
| "grad_norm": 3.5006558895111084, | |
| "learning_rate": 2.2433862433862434e-05, | |
| "loss": 4.4051, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.5534391534391534, | |
| "grad_norm": 3.895487070083618, | |
| "learning_rate": 2.2380952380952384e-05, | |
| "loss": 4.4244, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.5544973544973545, | |
| "grad_norm": 3.6373565196990967, | |
| "learning_rate": 2.2328042328042327e-05, | |
| "loss": 4.363, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 3.6034340858459473, | |
| "learning_rate": 2.2275132275132277e-05, | |
| "loss": 4.5101, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5566137566137566, | |
| "grad_norm": 6.335474967956543, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 4.2665, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.5576719576719577, | |
| "grad_norm": 3.84147047996521, | |
| "learning_rate": 2.216931216931217e-05, | |
| "loss": 4.0104, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.5587301587301587, | |
| "grad_norm": 5.233031272888184, | |
| "learning_rate": 2.211640211640212e-05, | |
| "loss": 4.6061, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.5597883597883598, | |
| "grad_norm": 5.9673638343811035, | |
| "learning_rate": 2.2063492063492063e-05, | |
| "loss": 4.3624, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.5608465608465608, | |
| "grad_norm": 6.370858669281006, | |
| "learning_rate": 2.2010582010582013e-05, | |
| "loss": 4.1914, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5619047619047619, | |
| "grad_norm": 4.488510608673096, | |
| "learning_rate": 2.1957671957671956e-05, | |
| "loss": 4.4786, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.562962962962963, | |
| "grad_norm": 5.369960308074951, | |
| "learning_rate": 2.1904761904761906e-05, | |
| "loss": 4.4175, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.564021164021164, | |
| "grad_norm": 4.92180061340332, | |
| "learning_rate": 2.1851851851851852e-05, | |
| "loss": 4.4534, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.5650793650793651, | |
| "grad_norm": 4.838220596313477, | |
| "learning_rate": 2.17989417989418e-05, | |
| "loss": 4.5532, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.5661375661375662, | |
| "grad_norm": 5.853415489196777, | |
| "learning_rate": 2.174603174603175e-05, | |
| "loss": 4.1377, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5671957671957671, | |
| "grad_norm": 4.8340349197387695, | |
| "learning_rate": 2.1693121693121692e-05, | |
| "loss": 4.1588, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.5682539682539682, | |
| "grad_norm": 3.3916378021240234, | |
| "learning_rate": 2.1640211640211642e-05, | |
| "loss": 4.3666, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.5693121693121693, | |
| "grad_norm": 3.867189407348633, | |
| "learning_rate": 2.158730158730159e-05, | |
| "loss": 4.2795, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.5703703703703704, | |
| "grad_norm": 4.104162693023682, | |
| "learning_rate": 2.1534391534391535e-05, | |
| "loss": 4.3084, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 3.797863721847534, | |
| "learning_rate": 2.148148148148148e-05, | |
| "loss": 4.3259, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5724867724867725, | |
| "grad_norm": 6.428219795227051, | |
| "learning_rate": 2.1428571428571428e-05, | |
| "loss": 4.502, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.5735449735449736, | |
| "grad_norm": 3.7687265872955322, | |
| "learning_rate": 2.1375661375661378e-05, | |
| "loss": 4.2587, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.5746031746031746, | |
| "grad_norm": 4.52725887298584, | |
| "learning_rate": 2.1322751322751325e-05, | |
| "loss": 4.6748, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.5756613756613757, | |
| "grad_norm": 7.533745288848877, | |
| "learning_rate": 2.126984126984127e-05, | |
| "loss": 4.4511, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.5767195767195767, | |
| "grad_norm": 6.036813735961914, | |
| "learning_rate": 2.1216931216931218e-05, | |
| "loss": 4.6686, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5777777777777777, | |
| "grad_norm": 5.2509331703186035, | |
| "learning_rate": 2.1164021164021164e-05, | |
| "loss": 4.1922, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.5788359788359788, | |
| "grad_norm": 4.836385726928711, | |
| "learning_rate": 2.111111111111111e-05, | |
| "loss": 4.3825, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.5798941798941799, | |
| "grad_norm": 3.305849075317383, | |
| "learning_rate": 2.105820105820106e-05, | |
| "loss": 4.4511, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.580952380952381, | |
| "grad_norm": 6.00985050201416, | |
| "learning_rate": 2.1005291005291007e-05, | |
| "loss": 4.6058, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.582010582010582, | |
| "grad_norm": 3.1658153533935547, | |
| "learning_rate": 2.0952380952380954e-05, | |
| "loss": 4.4339, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5830687830687831, | |
| "grad_norm": 3.461515426635742, | |
| "learning_rate": 2.08994708994709e-05, | |
| "loss": 4.1928, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.5841269841269842, | |
| "grad_norm": 3.686345100402832, | |
| "learning_rate": 2.0846560846560847e-05, | |
| "loss": 4.2196, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.5851851851851851, | |
| "grad_norm": 4.964058876037598, | |
| "learning_rate": 2.0793650793650797e-05, | |
| "loss": 4.4196, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.5862433862433862, | |
| "grad_norm": 3.2014639377593994, | |
| "learning_rate": 2.074074074074074e-05, | |
| "loss": 4.3415, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.5873015873015873, | |
| "grad_norm": 5.361993789672852, | |
| "learning_rate": 2.068783068783069e-05, | |
| "loss": 4.3946, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.5883597883597883, | |
| "grad_norm": 6.569308757781982, | |
| "learning_rate": 2.0634920634920636e-05, | |
| "loss": 4.1728, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.5894179894179894, | |
| "grad_norm": 4.793816089630127, | |
| "learning_rate": 2.0582010582010583e-05, | |
| "loss": 4.5882, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.5904761904761905, | |
| "grad_norm": 3.3498852252960205, | |
| "learning_rate": 2.052910052910053e-05, | |
| "loss": 4.3371, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.5915343915343916, | |
| "grad_norm": 3.4059863090515137, | |
| "learning_rate": 2.0476190476190476e-05, | |
| "loss": 4.384, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.5925925925925926, | |
| "grad_norm": 5.030102729797363, | |
| "learning_rate": 2.0423280423280426e-05, | |
| "loss": 4.3385, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5936507936507937, | |
| "grad_norm": 3.9506351947784424, | |
| "learning_rate": 2.037037037037037e-05, | |
| "loss": 4.3949, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.5947089947089947, | |
| "grad_norm": 4.350306034088135, | |
| "learning_rate": 2.031746031746032e-05, | |
| "loss": 4.2672, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.5957671957671957, | |
| "grad_norm": 3.6148154735565186, | |
| "learning_rate": 2.0264550264550266e-05, | |
| "loss": 4.3542, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.5968253968253968, | |
| "grad_norm": 4.015986442565918, | |
| "learning_rate": 2.0211640211640212e-05, | |
| "loss": 4.3468, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.5978835978835979, | |
| "grad_norm": 4.623772621154785, | |
| "learning_rate": 2.015873015873016e-05, | |
| "loss": 4.3602, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.5989417989417989, | |
| "grad_norm": 4.602967739105225, | |
| "learning_rate": 2.0105820105820105e-05, | |
| "loss": 4.2878, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 4.3940911293029785, | |
| "learning_rate": 2.0052910052910055e-05, | |
| "loss": 4.4939, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.6010582010582011, | |
| "grad_norm": 6.775349140167236, | |
| "learning_rate": 2e-05, | |
| "loss": 4.2697, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6021164021164022, | |
| "grad_norm": 3.592540979385376, | |
| "learning_rate": 1.9947089947089948e-05, | |
| "loss": 4.3831, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.6031746031746031, | |
| "grad_norm": 3.6361138820648193, | |
| "learning_rate": 1.9894179894179895e-05, | |
| "loss": 4.459, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6042328042328042, | |
| "grad_norm": 5.545779228210449, | |
| "learning_rate": 1.984126984126984e-05, | |
| "loss": 4.5091, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.6052910052910053, | |
| "grad_norm": 4.679067134857178, | |
| "learning_rate": 1.9788359788359788e-05, | |
| "loss": 4.1179, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.6063492063492063, | |
| "grad_norm": 4.026285171508789, | |
| "learning_rate": 1.9735449735449738e-05, | |
| "loss": 4.4972, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.6074074074074074, | |
| "grad_norm": 4.826168060302734, | |
| "learning_rate": 1.9682539682539684e-05, | |
| "loss": 4.326, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.6084656084656085, | |
| "grad_norm": 4.132863521575928, | |
| "learning_rate": 1.962962962962963e-05, | |
| "loss": 4.4658, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.6095238095238096, | |
| "grad_norm": 3.1971607208251953, | |
| "learning_rate": 1.9576719576719577e-05, | |
| "loss": 4.3489, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.6105820105820106, | |
| "grad_norm": 4.324203014373779, | |
| "learning_rate": 1.9523809523809524e-05, | |
| "loss": 4.5387, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.6116402116402117, | |
| "grad_norm": 4.452277660369873, | |
| "learning_rate": 1.9470899470899474e-05, | |
| "loss": 4.371, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.6126984126984127, | |
| "grad_norm": 5.406804084777832, | |
| "learning_rate": 1.9417989417989417e-05, | |
| "loss": 4.3936, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.6137566137566137, | |
| "grad_norm": 3.683032751083374, | |
| "learning_rate": 1.9365079365079367e-05, | |
| "loss": 4.5795, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6148148148148148, | |
| "grad_norm": 4.9315667152404785, | |
| "learning_rate": 1.9312169312169313e-05, | |
| "loss": 4.3404, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.6158730158730159, | |
| "grad_norm": 3.669973611831665, | |
| "learning_rate": 1.925925925925926e-05, | |
| "loss": 4.479, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.6169312169312169, | |
| "grad_norm": 6.914597034454346, | |
| "learning_rate": 1.920634920634921e-05, | |
| "loss": 5.2586, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.617989417989418, | |
| "grad_norm": 6.880323886871338, | |
| "learning_rate": 1.9153439153439153e-05, | |
| "loss": 4.2503, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.6190476190476191, | |
| "grad_norm": 3.348428726196289, | |
| "learning_rate": 1.9100529100529103e-05, | |
| "loss": 4.2295, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.6201058201058202, | |
| "grad_norm": 3.1387596130371094, | |
| "learning_rate": 1.9047619047619046e-05, | |
| "loss": 4.437, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.6211640211640211, | |
| "grad_norm": 3.1645758152008057, | |
| "learning_rate": 1.8994708994708996e-05, | |
| "loss": 4.3887, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.6222222222222222, | |
| "grad_norm": 3.8095288276672363, | |
| "learning_rate": 1.8941798941798943e-05, | |
| "loss": 4.3497, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.6232804232804233, | |
| "grad_norm": 3.393749237060547, | |
| "learning_rate": 1.888888888888889e-05, | |
| "loss": 4.2098, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.6243386243386243, | |
| "grad_norm": 5.955323696136475, | |
| "learning_rate": 1.883597883597884e-05, | |
| "loss": 4.3409, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6253968253968254, | |
| "grad_norm": 3.0141565799713135, | |
| "learning_rate": 1.8783068783068782e-05, | |
| "loss": 4.35, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.6264550264550265, | |
| "grad_norm": 5.6626176834106445, | |
| "learning_rate": 1.8730158730158732e-05, | |
| "loss": 4.3674, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.6275132275132275, | |
| "grad_norm": 3.7289211750030518, | |
| "learning_rate": 1.867724867724868e-05, | |
| "loss": 4.2095, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.6285714285714286, | |
| "grad_norm": 4.1014862060546875, | |
| "learning_rate": 1.8624338624338625e-05, | |
| "loss": 4.2905, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.6296296296296297, | |
| "grad_norm": 5.644645690917969, | |
| "learning_rate": 1.8571428571428572e-05, | |
| "loss": 4.2984, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.6306878306878307, | |
| "grad_norm": 5.378518104553223, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 4.1897, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.6317460317460317, | |
| "grad_norm": 4.600176811218262, | |
| "learning_rate": 1.8465608465608468e-05, | |
| "loss": 4.2564, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.6328042328042328, | |
| "grad_norm": 6.3858137130737305, | |
| "learning_rate": 1.8412698412698415e-05, | |
| "loss": 4.1599, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.6338624338624339, | |
| "grad_norm": 4.172644138336182, | |
| "learning_rate": 1.835978835978836e-05, | |
| "loss": 4.2698, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.6349206349206349, | |
| "grad_norm": 4.711319923400879, | |
| "learning_rate": 1.8306878306878308e-05, | |
| "loss": 4.4446, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.635978835978836, | |
| "grad_norm": 4.155757427215576, | |
| "learning_rate": 1.8253968253968254e-05, | |
| "loss": 4.4898, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.6370370370370371, | |
| "grad_norm": 4.684317588806152, | |
| "learning_rate": 1.82010582010582e-05, | |
| "loss": 4.2174, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.638095238095238, | |
| "grad_norm": 4.73888635635376, | |
| "learning_rate": 1.814814814814815e-05, | |
| "loss": 4.397, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.6391534391534391, | |
| "grad_norm": 4.162045478820801, | |
| "learning_rate": 1.8095238095238094e-05, | |
| "loss": 4.4202, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.6402116402116402, | |
| "grad_norm": 6.125421524047852, | |
| "learning_rate": 1.8042328042328044e-05, | |
| "loss": 4.2264, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.6412698412698413, | |
| "grad_norm": 5.661704063415527, | |
| "learning_rate": 1.798941798941799e-05, | |
| "loss": 4.0215, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.6423280423280423, | |
| "grad_norm": 6.217195510864258, | |
| "learning_rate": 1.7936507936507937e-05, | |
| "loss": 4.2445, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.6433862433862434, | |
| "grad_norm": 4.448593616485596, | |
| "learning_rate": 1.7883597883597887e-05, | |
| "loss": 3.9882, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.6444444444444445, | |
| "grad_norm": 6.415726184844971, | |
| "learning_rate": 1.783068783068783e-05, | |
| "loss": 4.4289, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.6455026455026455, | |
| "grad_norm": 4.124817848205566, | |
| "learning_rate": 1.777777777777778e-05, | |
| "loss": 4.3764, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6465608465608466, | |
| "grad_norm": 4.285531520843506, | |
| "learning_rate": 1.7724867724867723e-05, | |
| "loss": 4.4348, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.6476190476190476, | |
| "grad_norm": 4.148486614227295, | |
| "learning_rate": 1.7671957671957673e-05, | |
| "loss": 4.448, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.6486772486772486, | |
| "grad_norm": 6.1382060050964355, | |
| "learning_rate": 1.761904761904762e-05, | |
| "loss": 4.249, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.6497354497354497, | |
| "grad_norm": 4.567010879516602, | |
| "learning_rate": 1.7566137566137566e-05, | |
| "loss": 4.2096, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.6507936507936508, | |
| "grad_norm": 7.315486431121826, | |
| "learning_rate": 1.7513227513227516e-05, | |
| "loss": 4.1564, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6518518518518519, | |
| "grad_norm": 5.819315433502197, | |
| "learning_rate": 1.746031746031746e-05, | |
| "loss": 4.6175, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.6529100529100529, | |
| "grad_norm": 3.4257044792175293, | |
| "learning_rate": 1.740740740740741e-05, | |
| "loss": 4.4864, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.653968253968254, | |
| "grad_norm": 4.5550947189331055, | |
| "learning_rate": 1.7354497354497356e-05, | |
| "loss": 4.3435, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.6550264550264551, | |
| "grad_norm": 5.184391498565674, | |
| "learning_rate": 1.7301587301587302e-05, | |
| "loss": 4.294, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.656084656084656, | |
| "grad_norm": 5.654160022735596, | |
| "learning_rate": 1.724867724867725e-05, | |
| "loss": 4.3935, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6571428571428571, | |
| "grad_norm": 5.634194850921631, | |
| "learning_rate": 1.7195767195767195e-05, | |
| "loss": 4.0203, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.6582010582010582, | |
| "grad_norm": 5.462583065032959, | |
| "learning_rate": 1.7142857142857145e-05, | |
| "loss": 4.4987, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.6592592592592592, | |
| "grad_norm": 4.333361625671387, | |
| "learning_rate": 1.7089947089947092e-05, | |
| "loss": 4.4802, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.6603174603174603, | |
| "grad_norm": 3.9108376502990723, | |
| "learning_rate": 1.7037037037037038e-05, | |
| "loss": 4.2963, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.6613756613756614, | |
| "grad_norm": 4.688434600830078, | |
| "learning_rate": 1.6984126984126985e-05, | |
| "loss": 4.2159, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.6624338624338625, | |
| "grad_norm": 3.797638177871704, | |
| "learning_rate": 1.693121693121693e-05, | |
| "loss": 4.3626, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.6634920634920635, | |
| "grad_norm": 3.1089894771575928, | |
| "learning_rate": 1.6878306878306878e-05, | |
| "loss": 4.4314, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.6645502645502646, | |
| "grad_norm": 3.5496623516082764, | |
| "learning_rate": 1.6825396825396828e-05, | |
| "loss": 4.4034, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.6656084656084656, | |
| "grad_norm": 3.350839138031006, | |
| "learning_rate": 1.6772486772486774e-05, | |
| "loss": 4.4295, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 5.997464179992676, | |
| "learning_rate": 1.671957671957672e-05, | |
| "loss": 4.4228, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6677248677248677, | |
| "grad_norm": 4.045958042144775, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 4.507, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.6687830687830688, | |
| "grad_norm": 6.075403690338135, | |
| "learning_rate": 1.6613756613756614e-05, | |
| "loss": 4.4149, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.6698412698412698, | |
| "grad_norm": 4.632997512817383, | |
| "learning_rate": 1.6560846560846564e-05, | |
| "loss": 4.2041, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.6708994708994709, | |
| "grad_norm": 6.3005499839782715, | |
| "learning_rate": 1.6507936507936507e-05, | |
| "loss": 4.2768, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.671957671957672, | |
| "grad_norm": 3.847238063812256, | |
| "learning_rate": 1.6455026455026457e-05, | |
| "loss": 4.4107, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.6730158730158731, | |
| "grad_norm": 3.6848814487457275, | |
| "learning_rate": 1.6402116402116404e-05, | |
| "loss": 4.2626, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.674074074074074, | |
| "grad_norm": 6.838409900665283, | |
| "learning_rate": 1.634920634920635e-05, | |
| "loss": 4.3674, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.6751322751322751, | |
| "grad_norm": 6.633561134338379, | |
| "learning_rate": 1.62962962962963e-05, | |
| "loss": 4.626, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.6761904761904762, | |
| "grad_norm": 3.2217533588409424, | |
| "learning_rate": 1.6243386243386243e-05, | |
| "loss": 4.4099, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.6772486772486772, | |
| "grad_norm": 4.3829145431518555, | |
| "learning_rate": 1.6190476190476193e-05, | |
| "loss": 4.2519, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6783068783068783, | |
| "grad_norm": 4.161040306091309, | |
| "learning_rate": 1.6137566137566136e-05, | |
| "loss": 4.4131, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.6793650793650794, | |
| "grad_norm": 5.863997459411621, | |
| "learning_rate": 1.6084656084656086e-05, | |
| "loss": 3.9776, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.6804232804232804, | |
| "grad_norm": 4.011388301849365, | |
| "learning_rate": 1.6031746031746033e-05, | |
| "loss": 4.2827, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.6814814814814815, | |
| "grad_norm": 5.71457052230835, | |
| "learning_rate": 1.597883597883598e-05, | |
| "loss": 4.1867, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.6825396825396826, | |
| "grad_norm": 2.9896559715270996, | |
| "learning_rate": 1.5925925925925926e-05, | |
| "loss": 4.2617, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.6835978835978836, | |
| "grad_norm": 3.395284652709961, | |
| "learning_rate": 1.5873015873015872e-05, | |
| "loss": 4.4314, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.6846560846560846, | |
| "grad_norm": 5.6781086921691895, | |
| "learning_rate": 1.5820105820105822e-05, | |
| "loss": 4.1137, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.6857142857142857, | |
| "grad_norm": 5.346649646759033, | |
| "learning_rate": 1.576719576719577e-05, | |
| "loss": 4.186, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.6867724867724868, | |
| "grad_norm": 4.502099990844727, | |
| "learning_rate": 1.5714285714285715e-05, | |
| "loss": 4.2691, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.6878306878306878, | |
| "grad_norm": 4.361939907073975, | |
| "learning_rate": 1.5661375661375662e-05, | |
| "loss": 4.3178, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6888888888888889, | |
| "grad_norm": 4.989232540130615, | |
| "learning_rate": 1.560846560846561e-05, | |
| "loss": 4.5618, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.68994708994709, | |
| "grad_norm": 4.986234188079834, | |
| "learning_rate": 1.5555555555555555e-05, | |
| "loss": 4.141, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.691005291005291, | |
| "grad_norm": 4.698188781738281, | |
| "learning_rate": 1.5502645502645505e-05, | |
| "loss": 4.2413, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.692063492063492, | |
| "grad_norm": 3.489686965942383, | |
| "learning_rate": 1.544973544973545e-05, | |
| "loss": 4.1997, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.6931216931216931, | |
| "grad_norm": 5.456641674041748, | |
| "learning_rate": 1.5396825396825398e-05, | |
| "loss": 3.8636, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.6941798941798942, | |
| "grad_norm": 5.964907169342041, | |
| "learning_rate": 1.5343915343915344e-05, | |
| "loss": 3.9141, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.6952380952380952, | |
| "grad_norm": 4.022069931030273, | |
| "learning_rate": 1.529100529100529e-05, | |
| "loss": 4.222, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.6962962962962963, | |
| "grad_norm": 4.964052677154541, | |
| "learning_rate": 1.5238095238095241e-05, | |
| "loss": 4.4431, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.6973544973544974, | |
| "grad_norm": 4.9138712882995605, | |
| "learning_rate": 1.5185185185185186e-05, | |
| "loss": 4.3141, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.6984126984126984, | |
| "grad_norm": 5.288991928100586, | |
| "learning_rate": 1.5132275132275134e-05, | |
| "loss": 4.29, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6994708994708995, | |
| "grad_norm": 2.88527512550354, | |
| "learning_rate": 1.5079365079365079e-05, | |
| "loss": 4.4974, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.7005291005291006, | |
| "grad_norm": 3.6969175338745117, | |
| "learning_rate": 1.5026455026455027e-05, | |
| "loss": 4.3049, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.7015873015873015, | |
| "grad_norm": 3.2847836017608643, | |
| "learning_rate": 1.4973544973544974e-05, | |
| "loss": 4.1501, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.7026455026455026, | |
| "grad_norm": 4.742918014526367, | |
| "learning_rate": 1.4920634920634922e-05, | |
| "loss": 4.1301, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.7037037037037037, | |
| "grad_norm": 6.213130474090576, | |
| "learning_rate": 1.486772486772487e-05, | |
| "loss": 3.9858, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.7047619047619048, | |
| "grad_norm": 8.191361427307129, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 5.2602, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.7058201058201058, | |
| "grad_norm": 5.620320796966553, | |
| "learning_rate": 1.4761904761904763e-05, | |
| "loss": 4.3565, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.7068783068783069, | |
| "grad_norm": 3.7786309719085693, | |
| "learning_rate": 1.4708994708994708e-05, | |
| "loss": 4.3774, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.707936507936508, | |
| "grad_norm": 6.146083831787109, | |
| "learning_rate": 1.4656084656084656e-05, | |
| "loss": 4.4039, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.708994708994709, | |
| "grad_norm": 4.173206329345703, | |
| "learning_rate": 1.4603174603174605e-05, | |
| "loss": 4.495, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.71005291005291, | |
| "grad_norm": 4.497081279754639, | |
| "learning_rate": 1.455026455026455e-05, | |
| "loss": 4.2545, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 6.875660419464111, | |
| "learning_rate": 1.4497354497354498e-05, | |
| "loss": 4.1121, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.7121693121693121, | |
| "grad_norm": 5.249729633331299, | |
| "learning_rate": 1.4444444444444444e-05, | |
| "loss": 4.4577, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.7132275132275132, | |
| "grad_norm": 6.506221294403076, | |
| "learning_rate": 1.4391534391534392e-05, | |
| "loss": 4.5862, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 3.437570571899414, | |
| "learning_rate": 1.433862433862434e-05, | |
| "loss": 4.2684, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.7153439153439154, | |
| "grad_norm": 3.463667631149292, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 4.5315, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.7164021164021164, | |
| "grad_norm": 5.436329364776611, | |
| "learning_rate": 1.4232804232804234e-05, | |
| "loss": 4.5016, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.7174603174603175, | |
| "grad_norm": 3.9234187602996826, | |
| "learning_rate": 1.4179894179894179e-05, | |
| "loss": 4.5041, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.7185185185185186, | |
| "grad_norm": 3.112135410308838, | |
| "learning_rate": 1.4126984126984127e-05, | |
| "loss": 4.3273, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.7195767195767195, | |
| "grad_norm": 3.713372230529785, | |
| "learning_rate": 1.4074074074074075e-05, | |
| "loss": 4.3923, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7206349206349206, | |
| "grad_norm": 5.0203375816345215, | |
| "learning_rate": 1.4021164021164022e-05, | |
| "loss": 4.4324, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.7216931216931217, | |
| "grad_norm": 5.588266849517822, | |
| "learning_rate": 1.396825396825397e-05, | |
| "loss": 4.6234, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.7227513227513227, | |
| "grad_norm": 6.488243103027344, | |
| "learning_rate": 1.3915343915343915e-05, | |
| "loss": 4.9581, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.7238095238095238, | |
| "grad_norm": 5.537155628204346, | |
| "learning_rate": 1.3862433862433863e-05, | |
| "loss": 4.4419, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.7248677248677249, | |
| "grad_norm": 4.607074737548828, | |
| "learning_rate": 1.3809523809523811e-05, | |
| "loss": 4.3356, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.725925925925926, | |
| "grad_norm": 4.534053325653076, | |
| "learning_rate": 1.3756613756613756e-05, | |
| "loss": 4.4978, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.726984126984127, | |
| "grad_norm": 5.670014381408691, | |
| "learning_rate": 1.3703703703703704e-05, | |
| "loss": 4.4208, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.728042328042328, | |
| "grad_norm": 4.900901794433594, | |
| "learning_rate": 1.365079365079365e-05, | |
| "loss": 4.2623, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.7291005291005291, | |
| "grad_norm": 5.048198699951172, | |
| "learning_rate": 1.3597883597883599e-05, | |
| "loss": 4.3792, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.7301587301587301, | |
| "grad_norm": 3.673882007598877, | |
| "learning_rate": 1.3544973544973547e-05, | |
| "loss": 4.5557, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7312169312169312, | |
| "grad_norm": 4.310447692871094, | |
| "learning_rate": 1.3492063492063492e-05, | |
| "loss": 4.1925, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.7322751322751323, | |
| "grad_norm": 3.8118319511413574, | |
| "learning_rate": 1.343915343915344e-05, | |
| "loss": 4.25, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.7333333333333333, | |
| "grad_norm": 3.351060628890991, | |
| "learning_rate": 1.3386243386243385e-05, | |
| "loss": 4.4192, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.7343915343915344, | |
| "grad_norm": 4.297890663146973, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 4.208, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.7354497354497355, | |
| "grad_norm": 5.1479315757751465, | |
| "learning_rate": 1.3280423280423282e-05, | |
| "loss": 4.38, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.7365079365079366, | |
| "grad_norm": 5.45789909362793, | |
| "learning_rate": 1.3227513227513228e-05, | |
| "loss": 4.4045, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.7375661375661375, | |
| "grad_norm": 2.974043369293213, | |
| "learning_rate": 1.3174603174603176e-05, | |
| "loss": 4.5253, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.7386243386243386, | |
| "grad_norm": 5.557780742645264, | |
| "learning_rate": 1.3121693121693121e-05, | |
| "loss": 4.569, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.7396825396825397, | |
| "grad_norm": 3.755084991455078, | |
| "learning_rate": 1.306878306878307e-05, | |
| "loss": 4.3673, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.7407407407407407, | |
| "grad_norm": 4.555693626403809, | |
| "learning_rate": 1.3015873015873018e-05, | |
| "loss": 4.2973, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7417989417989418, | |
| "grad_norm": 5.5160651206970215, | |
| "learning_rate": 1.2962962962962962e-05, | |
| "loss": 4.399, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.7428571428571429, | |
| "grad_norm": 4.394107818603516, | |
| "learning_rate": 1.291005291005291e-05, | |
| "loss": 4.3341, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.7439153439153439, | |
| "grad_norm": 5.06443452835083, | |
| "learning_rate": 1.2857142857142857e-05, | |
| "loss": 4.1375, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.744973544973545, | |
| "grad_norm": 3.5541141033172607, | |
| "learning_rate": 1.2804232804232805e-05, | |
| "loss": 4.203, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.746031746031746, | |
| "grad_norm": 4.325985431671143, | |
| "learning_rate": 1.2751322751322754e-05, | |
| "loss": 4.5174, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.7470899470899471, | |
| "grad_norm": 2.9800527095794678, | |
| "learning_rate": 1.2698412698412699e-05, | |
| "loss": 4.4557, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.7481481481481481, | |
| "grad_norm": 3.3406500816345215, | |
| "learning_rate": 1.2645502645502647e-05, | |
| "loss": 4.453, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.7492063492063492, | |
| "grad_norm": 5.763829231262207, | |
| "learning_rate": 1.2592592592592592e-05, | |
| "loss": 4.3594, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.7502645502645503, | |
| "grad_norm": 4.623786449432373, | |
| "learning_rate": 1.253968253968254e-05, | |
| "loss": 4.291, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.7513227513227513, | |
| "grad_norm": 4.377042293548584, | |
| "learning_rate": 1.2486772486772486e-05, | |
| "loss": 4.0562, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.7523809523809524, | |
| "grad_norm": 3.437060594558716, | |
| "learning_rate": 1.2433862433862433e-05, | |
| "loss": 4.4313, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.7534391534391535, | |
| "grad_norm": 6.42732572555542, | |
| "learning_rate": 1.2380952380952381e-05, | |
| "loss": 4.3612, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.7544973544973544, | |
| "grad_norm": 5.4876627922058105, | |
| "learning_rate": 1.232804232804233e-05, | |
| "loss": 4.3101, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.7555555555555555, | |
| "grad_norm": 4.041126728057861, | |
| "learning_rate": 1.2275132275132276e-05, | |
| "loss": 4.3653, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.7566137566137566, | |
| "grad_norm": 4.583195209503174, | |
| "learning_rate": 1.2222222222222222e-05, | |
| "loss": 4.164, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.7576719576719577, | |
| "grad_norm": 4.028945446014404, | |
| "learning_rate": 1.2169312169312169e-05, | |
| "loss": 4.2116, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.7587301587301587, | |
| "grad_norm": 6.026222229003906, | |
| "learning_rate": 1.2116402116402117e-05, | |
| "loss": 4.2678, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.7597883597883598, | |
| "grad_norm": 3.280238151550293, | |
| "learning_rate": 1.2063492063492064e-05, | |
| "loss": 4.1293, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.7608465608465609, | |
| "grad_norm": 3.384033203125, | |
| "learning_rate": 1.201058201058201e-05, | |
| "loss": 4.3763, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "grad_norm": 3.618666648864746, | |
| "learning_rate": 1.1957671957671959e-05, | |
| "loss": 4.5046, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.762962962962963, | |
| "grad_norm": 3.716320514678955, | |
| "learning_rate": 1.1904761904761905e-05, | |
| "loss": 4.4088, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.764021164021164, | |
| "grad_norm": 4.554460048675537, | |
| "learning_rate": 1.1851851851851853e-05, | |
| "loss": 4.1846, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.765079365079365, | |
| "grad_norm": 4.681301116943359, | |
| "learning_rate": 1.17989417989418e-05, | |
| "loss": 4.2343, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.7661375661375661, | |
| "grad_norm": 5.567061901092529, | |
| "learning_rate": 1.1746031746031746e-05, | |
| "loss": 3.9467, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.7671957671957672, | |
| "grad_norm": 3.0023434162139893, | |
| "learning_rate": 1.1693121693121693e-05, | |
| "loss": 4.1829, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.7682539682539683, | |
| "grad_norm": 6.105156898498535, | |
| "learning_rate": 1.164021164021164e-05, | |
| "loss": 4.0128, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.7693121693121693, | |
| "grad_norm": 3.2592265605926514, | |
| "learning_rate": 1.1587301587301588e-05, | |
| "loss": 4.4044, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.7703703703703704, | |
| "grad_norm": 4.033621311187744, | |
| "learning_rate": 1.1534391534391536e-05, | |
| "loss": 4.5054, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.7714285714285715, | |
| "grad_norm": 6.191745281219482, | |
| "learning_rate": 1.1481481481481482e-05, | |
| "loss": 4.7064, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.7724867724867724, | |
| "grad_norm": 6.648366928100586, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 4.815, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7735449735449735, | |
| "grad_norm": 3.693485736846924, | |
| "learning_rate": 1.1375661375661376e-05, | |
| "loss": 4.1928, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.7746031746031746, | |
| "grad_norm": 3.7810678482055664, | |
| "learning_rate": 1.1322751322751324e-05, | |
| "loss": 4.3781, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.7756613756613756, | |
| "grad_norm": 4.211301803588867, | |
| "learning_rate": 1.126984126984127e-05, | |
| "loss": 4.1813, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.7767195767195767, | |
| "grad_norm": 3.337557792663574, | |
| "learning_rate": 1.1216931216931217e-05, | |
| "loss": 4.3928, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.7777777777777778, | |
| "grad_norm": 3.5120623111724854, | |
| "learning_rate": 1.1164021164021163e-05, | |
| "loss": 4.272, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.7788359788359789, | |
| "grad_norm": 5.32808780670166, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 4.3989, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.7798941798941799, | |
| "grad_norm": 5.039298057556152, | |
| "learning_rate": 1.105820105820106e-05, | |
| "loss": 4.1569, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.780952380952381, | |
| "grad_norm": 4.3160223960876465, | |
| "learning_rate": 1.1005291005291006e-05, | |
| "loss": 4.0202, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.782010582010582, | |
| "grad_norm": 6.317615985870361, | |
| "learning_rate": 1.0952380952380953e-05, | |
| "loss": 4.3689, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.783068783068783, | |
| "grad_norm": 4.654201030731201, | |
| "learning_rate": 1.08994708994709e-05, | |
| "loss": 4.2718, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7841269841269841, | |
| "grad_norm": 4.299462795257568, | |
| "learning_rate": 1.0846560846560846e-05, | |
| "loss": 4.2899, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.7851851851851852, | |
| "grad_norm": 3.5088319778442383, | |
| "learning_rate": 1.0793650793650794e-05, | |
| "loss": 4.453, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.7862433862433862, | |
| "grad_norm": 4.868035316467285, | |
| "learning_rate": 1.074074074074074e-05, | |
| "loss": 4.3791, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.7873015873015873, | |
| "grad_norm": 3.0440139770507812, | |
| "learning_rate": 1.0687830687830689e-05, | |
| "loss": 4.2274, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.7883597883597884, | |
| "grad_norm": 3.8341310024261475, | |
| "learning_rate": 1.0634920634920636e-05, | |
| "loss": 4.5376, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.7894179894179895, | |
| "grad_norm": 4.445629596710205, | |
| "learning_rate": 1.0582010582010582e-05, | |
| "loss": 4.4937, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.7904761904761904, | |
| "grad_norm": 3.684857130050659, | |
| "learning_rate": 1.052910052910053e-05, | |
| "loss": 4.8373, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.7915343915343915, | |
| "grad_norm": 3.8749489784240723, | |
| "learning_rate": 1.0476190476190477e-05, | |
| "loss": 4.5327, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.7925925925925926, | |
| "grad_norm": 3.256950855255127, | |
| "learning_rate": 1.0423280423280423e-05, | |
| "loss": 4.6, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.7936507936507936, | |
| "grad_norm": 5.716484069824219, | |
| "learning_rate": 1.037037037037037e-05, | |
| "loss": 4.7403, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7947089947089947, | |
| "grad_norm": 6.667811870574951, | |
| "learning_rate": 1.0317460317460318e-05, | |
| "loss": 4.1508, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.7957671957671958, | |
| "grad_norm": 6.336728572845459, | |
| "learning_rate": 1.0264550264550265e-05, | |
| "loss": 3.9915, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.7968253968253968, | |
| "grad_norm": 3.4443869590759277, | |
| "learning_rate": 1.0211640211640213e-05, | |
| "loss": 4.2412, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.7978835978835979, | |
| "grad_norm": 3.8380494117736816, | |
| "learning_rate": 1.015873015873016e-05, | |
| "loss": 4.1926, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.798941798941799, | |
| "grad_norm": 4.2469401359558105, | |
| "learning_rate": 1.0105820105820106e-05, | |
| "loss": 4.2798, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 3.973093032836914, | |
| "learning_rate": 1.0052910052910053e-05, | |
| "loss": 4.2463, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.801058201058201, | |
| "grad_norm": 3.1232118606567383, | |
| "learning_rate": 1e-05, | |
| "loss": 4.1261, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.8021164021164021, | |
| "grad_norm": 6.105686187744141, | |
| "learning_rate": 9.947089947089947e-06, | |
| "loss": 4.2619, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.8031746031746032, | |
| "grad_norm": 6.121016502380371, | |
| "learning_rate": 9.894179894179894e-06, | |
| "loss": 4.431, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.8042328042328042, | |
| "grad_norm": 6.174698352813721, | |
| "learning_rate": 9.841269841269842e-06, | |
| "loss": 4.3786, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8052910052910053, | |
| "grad_norm": 5.5360941886901855, | |
| "learning_rate": 9.788359788359789e-06, | |
| "loss": 4.2559, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.8063492063492064, | |
| "grad_norm": 3.6265597343444824, | |
| "learning_rate": 9.735449735449737e-06, | |
| "loss": 4.4958, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.8074074074074075, | |
| "grad_norm": 6.606620788574219, | |
| "learning_rate": 9.682539682539683e-06, | |
| "loss": 4.5506, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.8084656084656084, | |
| "grad_norm": 5.32057523727417, | |
| "learning_rate": 9.62962962962963e-06, | |
| "loss": 4.3155, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.8095238095238095, | |
| "grad_norm": 3.1966543197631836, | |
| "learning_rate": 9.576719576719577e-06, | |
| "loss": 4.4766, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.8105820105820106, | |
| "grad_norm": 4.467657566070557, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 4.4167, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.8116402116402116, | |
| "grad_norm": 3.145953416824341, | |
| "learning_rate": 9.470899470899471e-06, | |
| "loss": 4.2835, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.8126984126984127, | |
| "grad_norm": 4.974372386932373, | |
| "learning_rate": 9.41798941798942e-06, | |
| "loss": 4.5889, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.8137566137566138, | |
| "grad_norm": 3.7245638370513916, | |
| "learning_rate": 9.365079365079366e-06, | |
| "loss": 4.1537, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.8148148148148148, | |
| "grad_norm": 3.2511162757873535, | |
| "learning_rate": 9.312169312169313e-06, | |
| "loss": 4.4419, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8158730158730159, | |
| "grad_norm": 6.026091575622559, | |
| "learning_rate": 9.259259259259259e-06, | |
| "loss": 4.209, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.816931216931217, | |
| "grad_norm": 3.5133731365203857, | |
| "learning_rate": 9.206349206349207e-06, | |
| "loss": 4.3719, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.817989417989418, | |
| "grad_norm": 4.241281509399414, | |
| "learning_rate": 9.153439153439154e-06, | |
| "loss": 4.5862, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.819047619047619, | |
| "grad_norm": 4.5300517082214355, | |
| "learning_rate": 9.1005291005291e-06, | |
| "loss": 4.4843, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.8201058201058201, | |
| "grad_norm": 5.496360778808594, | |
| "learning_rate": 9.047619047619047e-06, | |
| "loss": 4.2999, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.8211640211640212, | |
| "grad_norm": 3.988664388656616, | |
| "learning_rate": 8.994708994708995e-06, | |
| "loss": 4.2362, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.8222222222222222, | |
| "grad_norm": 3.7009782791137695, | |
| "learning_rate": 8.941798941798943e-06, | |
| "loss": 4.2936, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.8232804232804233, | |
| "grad_norm": 5.037206649780273, | |
| "learning_rate": 8.88888888888889e-06, | |
| "loss": 4.3659, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.8243386243386244, | |
| "grad_norm": 3.1383752822875977, | |
| "learning_rate": 8.835978835978837e-06, | |
| "loss": 4.4558, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.8253968253968254, | |
| "grad_norm": 3.314852714538574, | |
| "learning_rate": 8.783068783068783e-06, | |
| "loss": 4.556, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8264550264550264, | |
| "grad_norm": 3.957002878189087, | |
| "learning_rate": 8.73015873015873e-06, | |
| "loss": 4.4447, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.8275132275132275, | |
| "grad_norm": 3.4280648231506348, | |
| "learning_rate": 8.677248677248678e-06, | |
| "loss": 4.245, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.8285714285714286, | |
| "grad_norm": 3.471327304840088, | |
| "learning_rate": 8.624338624338624e-06, | |
| "loss": 4.4841, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.8296296296296296, | |
| "grad_norm": 4.864382743835449, | |
| "learning_rate": 8.571428571428573e-06, | |
| "loss": 4.2108, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.8306878306878307, | |
| "grad_norm": 5.851284027099609, | |
| "learning_rate": 8.518518518518519e-06, | |
| "loss": 4.0281, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.8317460317460318, | |
| "grad_norm": 4.928969860076904, | |
| "learning_rate": 8.465608465608466e-06, | |
| "loss": 4.2632, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.8328042328042328, | |
| "grad_norm": 3.591388702392578, | |
| "learning_rate": 8.412698412698414e-06, | |
| "loss": 4.3222, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.8338624338624339, | |
| "grad_norm": 3.6822257041931152, | |
| "learning_rate": 8.35978835978836e-06, | |
| "loss": 4.687, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.834920634920635, | |
| "grad_norm": 3.3557658195495605, | |
| "learning_rate": 8.306878306878307e-06, | |
| "loss": 4.3424, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.8359788359788359, | |
| "grad_norm": 3.654463529586792, | |
| "learning_rate": 8.253968253968254e-06, | |
| "loss": 4.0186, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.837037037037037, | |
| "grad_norm": 4.440916538238525, | |
| "learning_rate": 8.201058201058202e-06, | |
| "loss": 4.1733, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.8380952380952381, | |
| "grad_norm": 5.019598007202148, | |
| "learning_rate": 8.14814814814815e-06, | |
| "loss": 4.2378, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.8391534391534392, | |
| "grad_norm": 5.27589225769043, | |
| "learning_rate": 8.095238095238097e-06, | |
| "loss": 4.2217, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.8402116402116402, | |
| "grad_norm": 3.2513492107391357, | |
| "learning_rate": 8.042328042328043e-06, | |
| "loss": 4.2982, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.8412698412698413, | |
| "grad_norm": 3.5115115642547607, | |
| "learning_rate": 7.98941798941799e-06, | |
| "loss": 4.2832, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.8423280423280424, | |
| "grad_norm": 4.6819000244140625, | |
| "learning_rate": 7.936507936507936e-06, | |
| "loss": 4.3657, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.8433862433862434, | |
| "grad_norm": 4.580070972442627, | |
| "learning_rate": 7.883597883597884e-06, | |
| "loss": 4.3363, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.8444444444444444, | |
| "grad_norm": 4.679523468017578, | |
| "learning_rate": 7.830687830687831e-06, | |
| "loss": 4.2533, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.8455026455026455, | |
| "grad_norm": 3.7114503383636475, | |
| "learning_rate": 7.777777777777777e-06, | |
| "loss": 4.2543, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.8465608465608465, | |
| "grad_norm": 3.6227071285247803, | |
| "learning_rate": 7.724867724867726e-06, | |
| "loss": 4.3794, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8476190476190476, | |
| "grad_norm": 3.9101266860961914, | |
| "learning_rate": 7.671957671957672e-06, | |
| "loss": 4.4385, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.8486772486772487, | |
| "grad_norm": 4.538809299468994, | |
| "learning_rate": 7.6190476190476205e-06, | |
| "loss": 4.1798, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.8497354497354498, | |
| "grad_norm": 3.392962694168091, | |
| "learning_rate": 7.566137566137567e-06, | |
| "loss": 4.3668, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.8507936507936508, | |
| "grad_norm": 3.7657711505889893, | |
| "learning_rate": 7.5132275132275136e-06, | |
| "loss": 4.2039, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.8518518518518519, | |
| "grad_norm": 4.12888240814209, | |
| "learning_rate": 7.460317460317461e-06, | |
| "loss": 4.4272, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.852910052910053, | |
| "grad_norm": 4.325788974761963, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 4.3938, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.8539682539682539, | |
| "grad_norm": 6.836640357971191, | |
| "learning_rate": 7.354497354497354e-06, | |
| "loss": 4.481, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.855026455026455, | |
| "grad_norm": 3.2702300548553467, | |
| "learning_rate": 7.301587301587302e-06, | |
| "loss": 4.2527, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.8560846560846561, | |
| "grad_norm": 3.6480207443237305, | |
| "learning_rate": 7.248677248677249e-06, | |
| "loss": 4.3472, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 3.0763864517211914, | |
| "learning_rate": 7.195767195767196e-06, | |
| "loss": 4.1348, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.8582010582010582, | |
| "grad_norm": 4.69089937210083, | |
| "learning_rate": 7.142857142857143e-06, | |
| "loss": 4.4034, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.8592592592592593, | |
| "grad_norm": 6.656426429748535, | |
| "learning_rate": 7.089947089947089e-06, | |
| "loss": 4.4632, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.8603174603174604, | |
| "grad_norm": 5.9208760261535645, | |
| "learning_rate": 7.0370370370370375e-06, | |
| "loss": 4.4209, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.8613756613756614, | |
| "grad_norm": 5.307591438293457, | |
| "learning_rate": 6.984126984126985e-06, | |
| "loss": 4.3608, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.8624338624338624, | |
| "grad_norm": 3.104219675064087, | |
| "learning_rate": 6.931216931216931e-06, | |
| "loss": 4.2559, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.8634920634920635, | |
| "grad_norm": 5.242783069610596, | |
| "learning_rate": 6.878306878306878e-06, | |
| "loss": 4.426, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.8645502645502645, | |
| "grad_norm": 3.6152803897857666, | |
| "learning_rate": 6.825396825396825e-06, | |
| "loss": 4.2805, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.8656084656084656, | |
| "grad_norm": 5.046814918518066, | |
| "learning_rate": 6.7724867724867736e-06, | |
| "loss": 4.2196, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.8666666666666667, | |
| "grad_norm": 4.008995532989502, | |
| "learning_rate": 6.71957671957672e-06, | |
| "loss": 4.3369, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.8677248677248677, | |
| "grad_norm": 3.5504777431488037, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 4.2037, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.8687830687830688, | |
| "grad_norm": 5.669442653656006, | |
| "learning_rate": 6.613756613756614e-06, | |
| "loss": 4.2309, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.8698412698412699, | |
| "grad_norm": 5.117327690124512, | |
| "learning_rate": 6.5608465608465606e-06, | |
| "loss": 4.3519, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.870899470899471, | |
| "grad_norm": 4.981729507446289, | |
| "learning_rate": 6.507936507936509e-06, | |
| "loss": 4.01, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.8719576719576719, | |
| "grad_norm": 3.664243459701538, | |
| "learning_rate": 6.455026455026455e-06, | |
| "loss": 4.2071, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.873015873015873, | |
| "grad_norm": 3.5531766414642334, | |
| "learning_rate": 6.402116402116403e-06, | |
| "loss": 4.4418, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.8740740740740741, | |
| "grad_norm": 3.976097583770752, | |
| "learning_rate": 6.349206349206349e-06, | |
| "loss": 4.3688, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.8751322751322751, | |
| "grad_norm": 3.7528669834136963, | |
| "learning_rate": 6.296296296296296e-06, | |
| "loss": 4.3587, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.8761904761904762, | |
| "grad_norm": 5.616343021392822, | |
| "learning_rate": 6.243386243386243e-06, | |
| "loss": 4.4252, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.8772486772486773, | |
| "grad_norm": 5.017111301422119, | |
| "learning_rate": 6.190476190476191e-06, | |
| "loss": 4.2342, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.8783068783068783, | |
| "grad_norm": 4.545386791229248, | |
| "learning_rate": 6.137566137566138e-06, | |
| "loss": 4.1429, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8793650793650793, | |
| "grad_norm": 5.04618501663208, | |
| "learning_rate": 6.0846560846560845e-06, | |
| "loss": 4.3157, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.8804232804232804, | |
| "grad_norm": 5.4261884689331055, | |
| "learning_rate": 6.031746031746032e-06, | |
| "loss": 4.1527, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.8814814814814815, | |
| "grad_norm": 4.209038734436035, | |
| "learning_rate": 5.978835978835979e-06, | |
| "loss": 4.1613, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.8825396825396825, | |
| "grad_norm": 3.330681562423706, | |
| "learning_rate": 5.925925925925927e-06, | |
| "loss": 4.3347, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.8835978835978836, | |
| "grad_norm": 4.095284938812256, | |
| "learning_rate": 5.873015873015873e-06, | |
| "loss": 4.4005, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.8846560846560847, | |
| "grad_norm": 5.182604789733887, | |
| "learning_rate": 5.82010582010582e-06, | |
| "loss": 4.0961, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.8857142857142857, | |
| "grad_norm": 4.743280410766602, | |
| "learning_rate": 5.767195767195768e-06, | |
| "loss": 4.4677, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.8867724867724868, | |
| "grad_norm": 5.909420967102051, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 4.0023, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.8878306878306879, | |
| "grad_norm": 4.317636966705322, | |
| "learning_rate": 5.661375661375662e-06, | |
| "loss": 4.2847, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 4.787182331085205, | |
| "learning_rate": 5.6084656084656084e-06, | |
| "loss": 4.3972, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.8899470899470899, | |
| "grad_norm": 4.967015266418457, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 4.23, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.891005291005291, | |
| "grad_norm": 3.515166997909546, | |
| "learning_rate": 5.502645502645503e-06, | |
| "loss": 4.5061, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.8920634920634921, | |
| "grad_norm": 5.033231258392334, | |
| "learning_rate": 5.44973544973545e-06, | |
| "loss": 4.416, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.8931216931216931, | |
| "grad_norm": 3.4822046756744385, | |
| "learning_rate": 5.396825396825397e-06, | |
| "loss": 4.345, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.8941798941798942, | |
| "grad_norm": 4.365127086639404, | |
| "learning_rate": 5.3439153439153445e-06, | |
| "loss": 4.2098, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.8952380952380953, | |
| "grad_norm": 2.938222646713257, | |
| "learning_rate": 5.291005291005291e-06, | |
| "loss": 4.1394, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.8962962962962963, | |
| "grad_norm": 3.7429540157318115, | |
| "learning_rate": 5.2380952380952384e-06, | |
| "loss": 4.3739, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.8973544973544973, | |
| "grad_norm": 5.029417991638184, | |
| "learning_rate": 5.185185185185185e-06, | |
| "loss": 4.4277, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.8984126984126984, | |
| "grad_norm": 3.043536424636841, | |
| "learning_rate": 5.132275132275132e-06, | |
| "loss": 4.1825, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.8994708994708994, | |
| "grad_norm": 2.5307250022888184, | |
| "learning_rate": 5.07936507936508e-06, | |
| "loss": 4.4159, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9005291005291005, | |
| "grad_norm": 4.139734268188477, | |
| "learning_rate": 5.026455026455026e-06, | |
| "loss": 4.4281, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.9015873015873016, | |
| "grad_norm": 4.402191638946533, | |
| "learning_rate": 4.973544973544974e-06, | |
| "loss": 4.2871, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.9026455026455027, | |
| "grad_norm": 3.3426613807678223, | |
| "learning_rate": 4.920634920634921e-06, | |
| "loss": 4.2474, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.9037037037037037, | |
| "grad_norm": 3.7858426570892334, | |
| "learning_rate": 4.8677248677248685e-06, | |
| "loss": 4.2868, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.9047619047619048, | |
| "grad_norm": 4.034852981567383, | |
| "learning_rate": 4.814814814814815e-06, | |
| "loss": 3.9792, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.9058201058201059, | |
| "grad_norm": 3.4261882305145264, | |
| "learning_rate": 4.7619047619047615e-06, | |
| "loss": 4.1932, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.9068783068783068, | |
| "grad_norm": 4.608288288116455, | |
| "learning_rate": 4.70899470899471e-06, | |
| "loss": 4.165, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.9079365079365079, | |
| "grad_norm": 3.5496902465820312, | |
| "learning_rate": 4.656084656084656e-06, | |
| "loss": 4.5227, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.908994708994709, | |
| "grad_norm": 5.273512840270996, | |
| "learning_rate": 4.603174603174604e-06, | |
| "loss": 4.2818, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.91005291005291, | |
| "grad_norm": 5.324268817901611, | |
| "learning_rate": 4.55026455026455e-06, | |
| "loss": 4.3419, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9111111111111111, | |
| "grad_norm": 3.7706358432769775, | |
| "learning_rate": 4.497354497354498e-06, | |
| "loss": 4.1445, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.9121693121693122, | |
| "grad_norm": 3.275144100189209, | |
| "learning_rate": 4.444444444444445e-06, | |
| "loss": 4.4849, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.9132275132275133, | |
| "grad_norm": 3.1599481105804443, | |
| "learning_rate": 4.3915343915343915e-06, | |
| "loss": 4.2842, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.9142857142857143, | |
| "grad_norm": 4.762506484985352, | |
| "learning_rate": 4.338624338624339e-06, | |
| "loss": 3.9712, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.9153439153439153, | |
| "grad_norm": 3.7932703495025635, | |
| "learning_rate": 4.285714285714286e-06, | |
| "loss": 4.3115, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.9164021164021164, | |
| "grad_norm": 4.2084736824035645, | |
| "learning_rate": 4.232804232804233e-06, | |
| "loss": 4.294, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.9174603174603174, | |
| "grad_norm": 3.4369008541107178, | |
| "learning_rate": 4.17989417989418e-06, | |
| "loss": 4.3855, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.9185185185185185, | |
| "grad_norm": 5.6549177169799805, | |
| "learning_rate": 4.126984126984127e-06, | |
| "loss": 4.3273, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.9195767195767196, | |
| "grad_norm": 3.5706095695495605, | |
| "learning_rate": 4.074074074074075e-06, | |
| "loss": 3.9828, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.9206349206349206, | |
| "grad_norm": 5.644643306732178, | |
| "learning_rate": 4.0211640211640215e-06, | |
| "loss": 4.1306, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.9216931216931217, | |
| "grad_norm": 5.228826522827148, | |
| "learning_rate": 3.968253968253968e-06, | |
| "loss": 4.1117, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.9227513227513228, | |
| "grad_norm": 3.1740591526031494, | |
| "learning_rate": 3.9153439153439155e-06, | |
| "loss": 4.4053, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.9238095238095239, | |
| "grad_norm": 3.6859006881713867, | |
| "learning_rate": 3.862433862433863e-06, | |
| "loss": 4.3222, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.9248677248677248, | |
| "grad_norm": 7.347630023956299, | |
| "learning_rate": 3.8095238095238102e-06, | |
| "loss": 4.3781, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.9259259259259259, | |
| "grad_norm": 4.983848571777344, | |
| "learning_rate": 3.7566137566137568e-06, | |
| "loss": 4.2057, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.926984126984127, | |
| "grad_norm": 6.287315368652344, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 4.1598, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.928042328042328, | |
| "grad_norm": 5.142515182495117, | |
| "learning_rate": 3.650793650793651e-06, | |
| "loss": 4.5319, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.9291005291005291, | |
| "grad_norm": 3.5645596981048584, | |
| "learning_rate": 3.597883597883598e-06, | |
| "loss": 4.3179, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.9301587301587302, | |
| "grad_norm": 4.008954048156738, | |
| "learning_rate": 3.5449735449735446e-06, | |
| "loss": 4.0794, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.9312169312169312, | |
| "grad_norm": 5.768702030181885, | |
| "learning_rate": 3.4920634920634924e-06, | |
| "loss": 4.5562, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9322751322751323, | |
| "grad_norm": 4.129014015197754, | |
| "learning_rate": 3.439153439153439e-06, | |
| "loss": 4.3659, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.9333333333333333, | |
| "grad_norm": 6.950862407684326, | |
| "learning_rate": 3.3862433862433868e-06, | |
| "loss": 4.3704, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.9343915343915344, | |
| "grad_norm": 6.936004638671875, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 4.6306, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.9354497354497354, | |
| "grad_norm": 6.682819366455078, | |
| "learning_rate": 3.2804232804232803e-06, | |
| "loss": 4.3953, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.9365079365079365, | |
| "grad_norm": 4.455723762512207, | |
| "learning_rate": 3.2275132275132277e-06, | |
| "loss": 4.3125, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.9375661375661376, | |
| "grad_norm": 5.623180389404297, | |
| "learning_rate": 3.1746031746031746e-06, | |
| "loss": 4.2719, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.9386243386243386, | |
| "grad_norm": 3.4735963344573975, | |
| "learning_rate": 3.1216931216931216e-06, | |
| "loss": 4.4579, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.9396825396825397, | |
| "grad_norm": 3.9385921955108643, | |
| "learning_rate": 3.068783068783069e-06, | |
| "loss": 4.3531, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.9407407407407408, | |
| "grad_norm": 4.1221818923950195, | |
| "learning_rate": 3.015873015873016e-06, | |
| "loss": 4.2012, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.9417989417989417, | |
| "grad_norm": 4.046891212463379, | |
| "learning_rate": 2.9629629629629633e-06, | |
| "loss": 4.4402, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.9428571428571428, | |
| "grad_norm": 4.89032506942749, | |
| "learning_rate": 2.91005291005291e-06, | |
| "loss": 4.2284, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.9439153439153439, | |
| "grad_norm": 3.5154287815093994, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 4.0714, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.944973544973545, | |
| "grad_norm": 4.873518466949463, | |
| "learning_rate": 2.8042328042328042e-06, | |
| "loss": 4.2028, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.946031746031746, | |
| "grad_norm": 2.748612403869629, | |
| "learning_rate": 2.7513227513227516e-06, | |
| "loss": 4.3582, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.9470899470899471, | |
| "grad_norm": 3.607381820678711, | |
| "learning_rate": 2.6984126984126986e-06, | |
| "loss": 4.7712, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.9481481481481482, | |
| "grad_norm": 3.9943275451660156, | |
| "learning_rate": 2.6455026455026455e-06, | |
| "loss": 4.3027, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.9492063492063492, | |
| "grad_norm": 4.708794116973877, | |
| "learning_rate": 2.5925925925925925e-06, | |
| "loss": 4.3027, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.9502645502645503, | |
| "grad_norm": 3.1916885375976562, | |
| "learning_rate": 2.53968253968254e-06, | |
| "loss": 4.346, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.9513227513227513, | |
| "grad_norm": 4.0273566246032715, | |
| "learning_rate": 2.486772486772487e-06, | |
| "loss": 4.2844, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.9523809523809523, | |
| "grad_norm": 4.110088348388672, | |
| "learning_rate": 2.4338624338624342e-06, | |
| "loss": 4.1634, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9534391534391534, | |
| "grad_norm": 3.984637975692749, | |
| "learning_rate": 2.3809523809523808e-06, | |
| "loss": 4.0886, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.9544973544973545, | |
| "grad_norm": 5.0471391677856445, | |
| "learning_rate": 2.328042328042328e-06, | |
| "loss": 4.3436, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.9555555555555556, | |
| "grad_norm": 2.9655942916870117, | |
| "learning_rate": 2.275132275132275e-06, | |
| "loss": 4.5204, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.9566137566137566, | |
| "grad_norm": 6.42230224609375, | |
| "learning_rate": 2.2222222222222225e-06, | |
| "loss": 4.61, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.9576719576719577, | |
| "grad_norm": 5.822199821472168, | |
| "learning_rate": 2.1693121693121695e-06, | |
| "loss": 4.1584, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.9587301587301588, | |
| "grad_norm": 4.80122709274292, | |
| "learning_rate": 2.1164021164021164e-06, | |
| "loss": 4.2502, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.9597883597883597, | |
| "grad_norm": 6.633864402770996, | |
| "learning_rate": 2.0634920634920634e-06, | |
| "loss": 4.3063, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.9608465608465608, | |
| "grad_norm": 4.278034210205078, | |
| "learning_rate": 2.0105820105820108e-06, | |
| "loss": 4.3158, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.9619047619047619, | |
| "grad_norm": 5.122796058654785, | |
| "learning_rate": 1.9576719576719577e-06, | |
| "loss": 4.4717, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.9629629629629629, | |
| "grad_norm": 4.548975467681885, | |
| "learning_rate": 1.9047619047619051e-06, | |
| "loss": 4.356, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.964021164021164, | |
| "grad_norm": 5.9967827796936035, | |
| "learning_rate": 1.8518518518518519e-06, | |
| "loss": 4.062, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.9650793650793651, | |
| "grad_norm": 2.504676580429077, | |
| "learning_rate": 1.798941798941799e-06, | |
| "loss": 4.2251, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.9661375661375662, | |
| "grad_norm": 3.5260939598083496, | |
| "learning_rate": 1.7460317460317462e-06, | |
| "loss": 4.3478, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.9671957671957672, | |
| "grad_norm": 4.228456974029541, | |
| "learning_rate": 1.6931216931216934e-06, | |
| "loss": 4.2843, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.9682539682539683, | |
| "grad_norm": 3.96049165725708, | |
| "learning_rate": 1.6402116402116401e-06, | |
| "loss": 4.4371, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.9693121693121693, | |
| "grad_norm": 4.0121846199035645, | |
| "learning_rate": 1.5873015873015873e-06, | |
| "loss": 4.1606, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.9703703703703703, | |
| "grad_norm": 4.30086612701416, | |
| "learning_rate": 1.5343915343915345e-06, | |
| "loss": 4.3627, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.9714285714285714, | |
| "grad_norm": 3.6604509353637695, | |
| "learning_rate": 1.4814814814814817e-06, | |
| "loss": 4.4242, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.9724867724867725, | |
| "grad_norm": 4.073081970214844, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 4.1937, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.9735449735449735, | |
| "grad_norm": 5.492738246917725, | |
| "learning_rate": 1.3756613756613758e-06, | |
| "loss": 4.0173, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.9746031746031746, | |
| "grad_norm": 3.94403076171875, | |
| "learning_rate": 1.3227513227513228e-06, | |
| "loss": 4.321, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.9756613756613757, | |
| "grad_norm": 4.628042697906494, | |
| "learning_rate": 1.26984126984127e-06, | |
| "loss": 4.2686, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.9767195767195768, | |
| "grad_norm": 7.038707733154297, | |
| "learning_rate": 1.2169312169312171e-06, | |
| "loss": 4.0722, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.9777777777777777, | |
| "grad_norm": 3.864443778991699, | |
| "learning_rate": 1.164021164021164e-06, | |
| "loss": 4.2553, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.9788359788359788, | |
| "grad_norm": 3.2768311500549316, | |
| "learning_rate": 1.1111111111111112e-06, | |
| "loss": 4.1619, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.9798941798941799, | |
| "grad_norm": 6.034976482391357, | |
| "learning_rate": 1.0582010582010582e-06, | |
| "loss": 4.2766, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.9809523809523809, | |
| "grad_norm": 3.5577170848846436, | |
| "learning_rate": 1.0052910052910054e-06, | |
| "loss": 4.2038, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.982010582010582, | |
| "grad_norm": 3.4226174354553223, | |
| "learning_rate": 9.523809523809526e-07, | |
| "loss": 4.2256, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.9830687830687831, | |
| "grad_norm": 3.801210880279541, | |
| "learning_rate": 8.994708994708995e-07, | |
| "loss": 4.6662, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.9841269841269841, | |
| "grad_norm": 3.7857511043548584, | |
| "learning_rate": 8.465608465608467e-07, | |
| "loss": 4.349, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.9851851851851852, | |
| "grad_norm": 4.348080635070801, | |
| "learning_rate": 7.936507936507937e-07, | |
| "loss": 4.2466, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.9862433862433863, | |
| "grad_norm": 3.3915085792541504, | |
| "learning_rate": 7.407407407407408e-07, | |
| "loss": 4.4284, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.9873015873015873, | |
| "grad_norm": 5.1464972496032715, | |
| "learning_rate": 6.878306878306879e-07, | |
| "loss": 4.2641, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.9883597883597883, | |
| "grad_norm": 4.361679553985596, | |
| "learning_rate": 6.34920634920635e-07, | |
| "loss": 4.1923, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.9894179894179894, | |
| "grad_norm": 4.495355606079102, | |
| "learning_rate": 5.82010582010582e-07, | |
| "loss": 4.1683, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.9904761904761905, | |
| "grad_norm": 4.874444484710693, | |
| "learning_rate": 5.291005291005291e-07, | |
| "loss": 4.2167, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.9915343915343915, | |
| "grad_norm": 3.7013728618621826, | |
| "learning_rate": 4.761904761904763e-07, | |
| "loss": 4.0992, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.9925925925925926, | |
| "grad_norm": 6.150048732757568, | |
| "learning_rate": 4.2328042328042335e-07, | |
| "loss": 4.2488, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.9936507936507937, | |
| "grad_norm": 5.740020751953125, | |
| "learning_rate": 3.703703703703704e-07, | |
| "loss": 4.4361, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.9947089947089947, | |
| "grad_norm": 5.7900800704956055, | |
| "learning_rate": 3.174603174603175e-07, | |
| "loss": 4.5688, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.9957671957671957, | |
| "grad_norm": 3.77296781539917, | |
| "learning_rate": 2.6455026455026455e-07, | |
| "loss": 4.3662, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.9968253968253968, | |
| "grad_norm": 4.104562759399414, | |
| "learning_rate": 2.1164021164021167e-07, | |
| "loss": 4.3561, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.9978835978835979, | |
| "grad_norm": 5.027563095092773, | |
| "learning_rate": 1.5873015873015874e-07, | |
| "loss": 4.3309, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.9989417989417989, | |
| "grad_norm": 4.270631313323975, | |
| "learning_rate": 1.0582010582010584e-07, | |
| "loss": 4.3465, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 4.4612956047058105, | |
| "learning_rate": 5.291005291005292e-08, | |
| "loss": 4.3776, | |
| "step": 945 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 945, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 5000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1382850220916736.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |